# Crop data sharing with AgML

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

def fnid_to_adm_id(sr_fnid):
    # LS1980A104   -> LS-A1-0004
    # ET2019A20604 -> ET-A2-0604
    # SN2008A21702 -> SN-A2-1702
    country_code =  sr_fnid.apply(lambda x: x[:2])
    admin_level = sr_fnid.apply(lambda x: x[6:8])
    short_id = sr_fnid.apply(lambda x: x[8:])
    short_id = short_id.apply(lambda x: x.zfill(4))
    sr_adm_id = country_code + '-' + admin_level + '-' + short_id
    return sr_adm_id.copy()

# Crop production data -------------------------------------- #
# Information about the data
countries = ['Angola', 'Burkina Faso','Lesotho', 'Madagascar', 'Malawi','Mozambique',
             'South Africa', 'Zambia','Ethiopia','Sudan','Chad','Niger','Senegal']
# Load the crop production data
df = pd.read_csv('../public/hvstat_data.csv', low_memory=False)
# Rename Mozambique maize
df.loc[df['product']=='Maize Grain (White)','product'] = 'Maize'
# Select countries and crops
df = df.loc[(np.isin(df.country,countries))&(df['product']=='Maize'),:]
# Drop extraneous crop production systems
df = df.loc[~((df.crop_production_system=='Rainfed (PS)')|(df.crop_production_system=='Plaine/Bas-fond irrigated (PS)')|
              (df.crop_production_system=='irrigated')|(df.crop_production_system=='recessional (PS)'))]
# Drop seasons we don't need
df = df.loc[~((df.season_name=='Winter')|(df.season_name=='Main-off')|(df.season_name=='Deyr-off')|
              (df.season_name=='Gu-off'))]
df = df[['fnid','country','country_code','product','season_name','planting_month','planting_year','harvest_month','harvest_year','indicator','value']]
df = df.reset_index(drop=True)
df = df.rename(columns={'fnid':'adm_id','product':'crop_name'})
df['adm_id'] = fnid_to_adm_id(df['adm_id'])
# ----------------------------------------------------------- #

# Shapefile data -------------------------------------------- # 
shape = gpd.read_file('../public/hvstat_shape.gpkg')
shape = shape.loc[np.isin(shape.ADMIN0,countries),:]
shape = shape[shape['ADMIN2'] != ''] # drop level 0 country boundaries
shape = shape.rename(columns={'FNID':'adm_id', 'ADMIN0':'admin0', 'ADMIN1':'admin1', 'ADMIN2':'admin2'})
shape = shape.reset_index(drop=True)
shape['adm_id'] = fnid_to_adm_id(shape['adm_id'])
# ----------------------------------------------------------- #

# # Save the data --------------------------------------------- #
# version = '0.1'
# fn_out = '../public/Africa_crop_production_AgML_v'+version+'.csv'
# df.to_csv(fn_out); print('%s is saved.' % fn_out)
# fn_out = '../public/adm_shapefile_AgML_v'+version+'.shp'
# shape.to_file(fn_out); print('%s is saved.' % fn_out)
# # ----------------------------------------------------------- #

In [3]:
df[['country', 'season_name']].drop_duplicates()

Unnamed: 0,country,season_name
0,Angola,Main
1134,Burkina Faso,Main
4700,Ethiopia,Meher
7875,Lesotho,Summer
9054,Madagascar,Annual
10508,Malawi,Annual
10808,Mozambique,Main
11416,Niger,Main
13144,Senegal,Main
17477,Chad,Main
