# HarvestStat Data Profiling - Uganda

In [1]:
import glob, json
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from tools import save_npz, PrintAdminUnits, PlotAdminShapes
from tools import FDW_PD_Sweeper, FDW_PD_AvalTable, FDW_PD_Compiling, FDW_PD_ValidateFnidName
from tools import FDW_PD_CreateAdminLink, FDW_PD_RatioAdminLink, FDW_PD_ConnectAdminLink
from tools import FDW_PD_CaliSeasonYear
from tools_graphic import PlotBarProduction, PlotLinePAY
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None

In [2]:
# Load FEWS NET administrative boundaries ------- #
epsg = 'epsg:32736' # Uganda
fn_shapes = sorted(glob.glob('../data/shapefile/fewsnet/UG_Admin?_????.shp'))
shape_all = []
for fn in fn_shapes:
    name = fn[-18:-4]
    exec('%s = gpd.read_file("%s").to_crs("%s")' % (name, fn, epsg))
    exec('%s["area"] = %s["geometry"].area/10**6' % (name, name))
    exec('shape_all.append(%s)' % (name))
shape_all = pd.concat(shape_all, axis=0).reset_index(drop=True)
PrintAdminUnits(shape_all)
# ----------------------------------------------- #

# FDW API host address -------------------------- #
host = 'https://fdw.fews.net'
auth = tuple(json.loads(open('token.json', "r").read()))
parameters = {
    'format': 'json',
    'country': 'Uganda',
    'product': ['R011','R012','R013','R014','R015','R017','R018'],
    'survey_type': 'crop:best'
}
endpoint = '/api/cropproductionindicatorvalue/'
response = requests.get(host + endpoint, auth=auth, params=parameters, proxies={})
response.raise_for_status()
df = pd.DataFrame.from_records(response.json())
print(df.groupby(['status','collection_status','data_usage_policy']).size().reset_index(name='count'))
# ----------------------------------------------- #

- FEWS NET admin shapefiles ------------------- #
| year	 | Admin1   | # units   | Admin2   | # units   | Admin3   | # units   |
| 1990	 | UG1990A1 | 4	| UG1990A2	| 34	| nan	| 0	|
| 1991	 | UG1991A1 | 4	| UG1991A2	| 38	| nan	| 0	|
| 1994	 | UG1994A1 | 4	| UG1994A2	| 39	| nan	| 0	|
| 1997	 | UG1997A1 | 4	| UG1997A2	| 45	| nan	| 0	|
| 2001	 | UG2001A1 | 4	| UG2001A2	| 56	| nan	| 0	|
| 2005	 | UG2005A1 | 4	| UG2005A2	| 69	| nan	| 0	|
| 2007	 | UG2007A1 | 4	| UG2007A2	| 80	| nan	| 0	|
| 2010	 | UG2010A1 | 4	| UG2010A2	| 112	| nan	| 0	|
| 2014	 | UG2014A1 | 4	| UG2014A2	| 112	| nan	| 0	|
| 2021	 | UG2021A1 | 4	| UG2021A2	| 135	| nan	| 0	|
----------------------------------------------- #
          status collection_status data_usage_policy  count
0      Collected         Published            Public   6799
1  Not Collected         Published            Public   2725


In [3]:
df['season_year'].unique()

array(['First harvest 2009', 'Second harvest 2008', 'Annual harvest 2008',
       'Annual harvest 2009', 'Second harvest 1991',
       'Annual harvest 1991', 'First harvest 1990'], dtype=object)

In [4]:
df['fnid'].unique()

array(['UG2007A20106', 'UG2007A20108', 'UG2007A20109', 'UG2007A20112',
       'UG2007A20114', 'UG2007A20116', 'UG2007A20118', 'UG2007A20120',
       'UG2007A20121', 'UG2007A20122', 'UG2007A20123', 'UG2007A20124',
       'UG2007A20125', 'UG2007A20126', 'UG2007A20127', 'UG2007A20128',
       'UG2007A20201', 'UG2007A20202', 'UG2007A20203', 'UG2007A20205',
       'UG2007A20206', 'UG2007A20208', 'UG2007A20209', 'UG2007A20212',
       'UG2007A20213', 'UG2007A20214', 'UG2007A20217', 'UG2007A20222',
       'UG2007A20223', 'UG2007A20224', 'UG2007A20226', 'UG2007A20232',
       'UG2007A20233', 'UG2007A20234', 'UG2007A20235', 'UG2007A20236',
       'UG2007A20237', 'UG2007A20238', 'UG2007A20239', 'UG2007A20240',
       'UG2007A20301', 'UG2007A20302', 'UG2007A20305', 'UG2007A20309',
       'UG2007A20310', 'UG2007A20311', 'UG2007A20312', 'UG2007A20314',
       'UG2007A20316', 'UG2007A20321', 'UG2007A20327', 'UG2007A20329',
       'UG2007A20331', 'UG2007A20332', 'UG2007A20333', 'UG2007A20334',
      

In [5]:


# Manual Pre-processing before Sweeping --------- #
# 1. Default setting 
# a) None-type population group
df.loc[df['population_group'].isna(), 'population_group'] = 'none'
df.loc[df['population_group'] == '', 'population_group'] = 'none'
# ----------------------------------------------- #

# FDW Production Data Inspection ---------------- #
df, df_raw = FDW_PD_Sweeper(df)
table_dict = FDW_PD_AvalTable(df, shape_all)
# ----------------------------------------------- #

# FEWS NET Shapefile comparison ----------------- #
shape_used = pd.concat([UG_Admin2_1990, UG_Admin2_2007], axis=0)
PlotAdminShapes(shape_used, label=True)
# ----------------------------------------------- #

- Remove missing records ---------------------- #
Orignial data points: 9,524
Removed 2,725 "Missing Value" points
0/2,381 "Area Harvested" points are retained.
2,345/2,381 "Area Planted" points are retained.
2,238/2,381 "Quantity Produced" points are retained.
2,216/2,381 "Yield" points are retained.
Current data points: 6,799

- Minor changes are applied ------------------- #

- Basic information --------------------------- #
Data period: 1990 - 2009
16 grain types are found: Banana (unspecified), Beans (mixed), Cassava, Cooking Banana (unspecified), Cowpeas (Mixed), Field Peas, Groundnuts (In Shell), Maize Grain (White), Millet (Finger), Pigeon Peas, Potato (Irish), Rice (Paddy), Sesame Seed, Sorghum, Soybean (unspecified), Sweet Potatoes
3 seasons are found: First harvest (01-01), Second harvest (07-01), Annual harvest (01-01)


TypeError: sequence item 0: expected str instance, NoneType found

- Uganda crop seasonal calendar

![FEWS NET](https://fews.net/sites/default/files/styles/large/public/seasonal-calendar-uganda.png?itok=ka9niXoM)

- FDW data consists of `UG1990A2` and `UG2007A2`.

| Year  | Admin-1  | # units  | Admin-2  | # units |
| :---: | :----:   | :----:   | :----:   | :---:   |
| 1990  | UG1990A1 | 4        | **`UG1990A2`** | 34      |
| 1991  | UG1991A1 | 4        | UG1991A2 | 38      |
| 1994  | UG1994A1 | 4        | UG1994A2 | 39      |
| 1997  | UG1997A1 | 4        | UG1997A2 | 45      |
| 2001  | UG2001A1 | 4        | UG2001A2 | 56      |
| 2005  | UG2005A1 | 4        | UG2005A2 | 69      |
| 2007  | UG2007A1 | 4        | **`UG2007A2`** | 80      |
| 2010  | UG2010A1 | 4        | UG2010A2 | 112     |
| 2014  | UG2014A1 | 4        | UG2014A2 | 112     |
| 2021  | UG2021A1 | 4        | UG2021A2 | 135     |

- Comparison between admin boundaries.

![image](https://github.com/chc-ucsb/gscd/blob/main/figures/UG_admin_shapes.png?raw=true)

- **FDW data consists of only 5 years of records**.
- **`UG2007A2`** is used to represent the current admin-level 2 crop data.
- Uganda has three crop seasons: `Annual harvest`,`First harvest` and `Second harvest`.
- Uganda has no crop production system.
- Uganda has no population group.

In [None]:
# Define the latest shapefile ------------------- #
latest_level = 2
shape_latest = UG_Admin2_2007.copy().to_crs('epsg:4326')
# ----------------------------------------------- #

# # Validation of FNIDs and Names ----------------- #
# df = FDW_PD_ValidateFnidName(df, shape_used, shape_latest)
# # ----------------------------------------------- #

# # FDW Production Data Compiling ----------------- #
# area, prod = FDW_PD_Compiling(df, shape_used)
# area_all, prod_all = area.copy(), prod.copy()
# mdx_pss = area.columns.droplevel([0,1]).unique()
# # ----------------------------------------------- #