# Calculate yield anomalies and plot them
- Script to take the processed crop yield data and calculate crop yield anomalies. The script calculates three different types of anomalies to enable sensitivity analyses, and writes out new objects accordingly
- In a second step these yield anomalies are written out to gridded netCDF objects for ease of use in other applications as this is the standard format for other gridded yield products


In [1]:
from scipy import signal
from scipy import ndimage
import netCDF4
import os, json
import warnings
import regionmask
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_hdf
from tools import CreateLinkAdmin
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
#define a moving average function
def moving_average(a, smooth) :
    n=smooth*2+1
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

In [3]:
#read in the crop yield data
df = pd.read_hdf('./data/crop/adm_crop_production_ALL.hdf')
#read in the FEWS admins
shape = gpd.read_file('./data/shapefile/adm_current.shp').to_crs("EPSG:4326")
#merge the two
df = pd.read_hdf('./data/crop/adm_crop_production_ALL.hdf')
df = df.merge(shape[['FNID','ADMIN0','ADMIN1','ADMIN2']], left_on='fnid', right_on='FNID')
df = df.rename(columns={'ADMIN1':'admin1','ADMIN2':'admin2','season_name':'season'})
df = df[['fnid','country','admin1','admin2','product','season','harvest_month','harvest_year','indicator','value']]
df['admin'] = df['fnid'].apply(lambda x: x[2:8])

### Calculate anomalies and grid
Below is the main portion of the script, where anomalies are calculated and written out as both a table and as a gridded netCDF product

In [4]:
#use FNIDs and years to create an empty dataframe
fnids = df.fnid.unique()
yrs = np.sort(df.harvest_year.unique())
dfEmpty = pd.DataFrame(columns = yrs, index=fnids,dtype=float)

#get a table of FNIDs and geometries
fnGeos = shape[['FNID','geometry']].drop_duplicates()
fnGeos.set_index('FNID',inplace=True)

#make an empty grid
lon = np.arange(-20,55,0.1)
lat = np.arange(40,-40,-0.1)

cps = {
    'maize':{'Somalia':['Maize','Gu'],
            'Malawi':['Maize','Main'],
            'Kenya':['Maize','Long'],
            'Burkina Faso':['Maize','Main'],
            'Mali':['Maize','Main'],
            'Chad':['Maize','Main'],
            'South Africa':['Maize (Yellow)','Summer'],
            'Niger':['Maize','Main season'],
            'Zambia':['Maize','Annual']},
    'wheat':{'Kenya':['Wheat','Annual'],
            'Mali':['Wheat','Main'],
            'Chad':['Wheat','Main'],
            'South Africa':['Wheat','Winter']}
}

dfYld = dfEmpty.copy()
dfYldGau = dfEmpty.copy()
dfYldGauAbs = dfEmpty.copy()
dfYldSm5 = dfEmpty.copy()
dfYldSm9 = dfEmpty.copy()

for icr in ['maize','wheat']:
    ncPath = '/Users/wanders7/Documents/Code/Project/NASA_GSCD/gscd/data/gridded/'+icr+'.nc'
    ncf = netCDF4.Dataset(ncPath,'w',format='NETCDF4')
    #create dimensions                            
    ncf.createDimension('lon',lon.size)
    ncf.createDimension('lat',lat.size)
    ncf.createDimension('yr',yrs.size)
    #create variables
    ncf.createVariable('latitude','f4',('lat'))
    ncf.createVariable('longitude','f4',('lon'))
    ncf.createVariable('year','i8',('yr'))
    #fill variables lat/lon
    ncf.variables['latitude'][:]=lat
    ncf.variables['longitude'][:]=lon    
    ncf.variables['year'][:]=yrs
    #create the yield variables and objects
    ncf.createVariable('yield_pct','f4',('yr','lat','lon'))
    ncf.createVariable('yield_abs','f4',('yr','lat','lon'))
    
    yld = np.ones([yrs.size,lat.size,lon.size])*np.nan
    yldAbs = np.ones([yrs.size,lat.size,lon.size])*np.nan
    
    for icx in cps[icr].keys():
        ipx = cps[icr][icx][0]
        isx = cps[icr][icx][1]
        ifnds = df.loc[(df['product']==ipx)&(df.country==icx)&(df.season==isx)].fnid.unique()
        
        for ifnx in ifnds:
            shp = fnGeos.loc[ifnx].geometry
            
            ylds = df.loc[(df['product']==ipx)&(df.country==icx)&(df.season==isx)&
                   (df.fnid==ifnx)&(df.indicator=='yield')].sort_values(by='harvest_year')
            if ylds.value.size<10:
                print('fewer than 10 years for '+ifnx+', '+ipx+', '+isx)
                continue
            
            dfYld.loc[ifnx][ylds.harvest_year] = ylds.value
            iYld = dfYld.loc[ifnx][ylds.harvest_year].values.astype(float)

            #To account for a missing values in locations we can drop and re-weight the remaining values by counting non-zero values        
            yldCount = np.copy(iYld)
            yldCount[~np.isnan(yldCount)]=1
            yldCount[np.isnan(yldCount)]=0
            iYld[np.isnan(iYld)]=0
            
            #divide by the count to re-weight based on the # of values going into each smoothing filter
            yldSm9Exp = moving_average(np.array(iYld),4)/moving_average(np.array(yldCount),4)
            yldSm5Exp = moving_average(np.array(iYld),2)/moving_average(np.array(yldCount),2)
            yldGauExp = ndimage.gaussian_filter1d(iYld,3)/ndimage.gaussian_filter1d(yldCount,3)
            
            stYldGauAbs = (iYld-yldGauExp)
            #percent yield anom = (yld obs - yld exp)/yld exp
            stYldGau = (iYld-yldGauExp)/yldGauExp
            stYldSm9 = np.array(iYld[4:-4]-yldSm9Exp)/yldSm9Exp
            stYldSm5 = np.array(iYld[2:-2]-yldSm5Exp)/yldSm5Exp
            
            #add nans back in
            stYldGau[np.isnan(dfYld.loc[ifnx][ylds.harvest_year].values.astype(float))]=np.nan
            stYldSm5[np.isnan(dfYld.loc[ifnx][ylds.harvest_year].values[2:-2].astype(float))]=np.nan
            stYldSm9[np.isnan(dfYld.loc[ifnx][ylds.harvest_year].values[4:-4].astype(float))]=np.nan
            stYldGauAbs[np.isnan(dfYld.loc[ifnx][ylds.harvest_year].values.astype(float))]=np.nan
            
            #write the empty data frames
            dfYldGauAbs.loc[ifnx][ylds.harvest_year.values] = stYldGauAbs
            dfYldGau.loc[ifnx][ylds.harvest_year.values] = stYldGau
            dfYldSm5.loc[ifnx][ylds.harvest_year.values][2:-2] = stYldSm5
            dfYldSm9.loc[ifnx][ylds.harvest_year.values][4:-4] = stYldSm9
            #find the location mask
            msk = np.array(regionmask.Regions([fnGeos.loc[ifnx].geometry]).mask(lon,lat))
            #assign the yield time series to the relevant grid points based on the mask
            yld.T[msk.T==0,...] = dfYldGau.loc[ifnx].values
            yldAbs.T[msk.T==0,...] = dfYldGauAbs.loc[ifnx].values
    #write the yield objects into the netCDF
    ncf['yield_pct'][:] = yld
    ncf['yield_abs'][:] = yldAbs
    ncf.close();del ncf
    

fewer than 10 years for SO1990A21301, Maize, Gu
fewer than 10 years for SO1990A22505, Maize, Gu
fewer than 10 years for SO1990A22502, Maize, Gu


  msk = np.array(regionmask.Regions([fnGeos.loc[ifnx].geometry]).mask(lon,lat))


fewer than 10 years for KE2013A147, Maize, Long
fewer than 10 years for ML2001A107, Maize, Main
fewer than 10 years for TD2012A102, Maize, Main
fewer than 10 years for NE2012A20204, Maize, Main season
fewer than 10 years for NE2012A20205, Maize, Main season
fewer than 10 years for NE2012A20210, Maize, Main season
fewer than 10 years for NE2012A20407, Maize, Main season
fewer than 10 years for NE2012A20516, Maize, Main season
fewer than 10 years for NE2012A20519, Maize, Main season
fewer than 10 years for NE2012A20603, Maize, Main season
fewer than 10 years for NE2012A20607, Maize, Main season
fewer than 10 years for NE2012A20611, Maize, Main season
fewer than 10 years for NE2012A20614, Maize, Main season
fewer than 10 years for NE2012A20616, Maize, Main season
fewer than 10 years for NE2012A20710, Maize, Main season
fewer than 10 years for NE2012A20711, Maize, Main season
fewer than 10 years for NE2012A20712, Maize, Main season
fewer than 10 years for NE2012A20715, Maize, Main season
f