# Mapping real data to Wofost input/output 

In [104]:
import sys, os.path
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", 250)

import pcse
from pcse.models import Wofost71_PP
from pcse.base import ParameterProvider
from pcse.db import NASAPowerWeatherDataProvider
from pcse.fileinput import YAMLCropDataProvider
# from pcse.util import WOFOST71SiteDataProvider, DummySoilDataProvider
from progressbar import printProgressBar
from pcse.fileinput import CABOFileReader
from pcse.engine import Engine

In [105]:
# ## Define location, crop and season
# latitude, longitude = 52.2, 5.0
# crop_name = 'wheat'
# variety_name = 'Winter_wheat_101'
# campaign_start_date = '2006-01-01'
# emergence_date = "2006-03-31"
# harvest_date = "2006-10-20"
# max_duration = 300

In [106]:
## Retrieve data from default param files

data_dir = os.path.join(os.getcwd(), 'default_data') # Rogerio's data
crop_file_name = "crop.cab"
soil_file_name = "soil.cab" # Must be a CABO file
site_file_name = "site.cab" # Must be a CABO file
agro_file_name = "agro.yaml"# Must be a YAML file
config_file_name = "WLP_NPK.conf" # Water-limited and nutrient-limited production simulation

In [107]:
## Retrieve crop data
# cropd = YAMLCropDataProvider() # pulls from https://github.com/ajwdewit/WOFOST_crop_parameters
# cropd.set_active_crop(crop_name, variety_name)
# cropd

In [116]:
soild = CABOFileReader(os.path.join(data_dir, soil_file_name))
sited = CABOFileReader(os.path.join(data_dir, site_file_name))
cropd = CABOFileReader(os.path.join(data_dir, crop_file_name))
agromanagement = yaml.load(open(os.path.join(data_dir, agro_file_name)), 
                           Loader=yaml.SafeLoader)['AgroManagement']
# agro_yaml = """
# - {start}:
#     CropCalendar:
#         crop_name: {cname}
#         variety_name: {vname}
#         crop_start_date: {startdate}
#         crop_start_type: emergence
#         crop_end_date: {enddate}
#         crop_end_type: harvest
#         max_duration: {maxdur}
#     TimedEvents: null
#     StateEvents: null
# """.format(cname=crop_name, vname=variety_name, 
#            start=campaign_start_date, startdate=emergence_date, 
#            enddate=harvest_date, maxdur=max_duration)
# agromanagement = yaml.safe_load(agro_yaml)
# print(agro_yaml)
config = os.path.join(data_dir, config_file_name)
# soild

Real soil data available only for the below variables:

- SMW :  soil moisture content at wilting point [cm3/cm3]
- SMFCF :  soil moisture content at field capacity [cm3/cm3]
- K0 : hydraulic conductivity of saturated soil [cm day-1]
- SOPE  : maximum percolation rate root zone[cm day-1]
- KSUB : maximum percolation rate subsoil [cm day-1]


In [117]:
soil_data_path = 'actual_data/soil/soils_locations.csv' #soils_1stDraft.csv'
soil_cols = ['SMW', 'SMFCF', 'K0', 'SOPE', 'KSUB', 'center']

In [118]:
df_soil = pd.read_csv(soil_data_path, usecols=soil_cols)
df_soil.head(10)

Unnamed: 0,SMW,SMFCF,K0,SOPE,KSUB,center
0,0.089795,0.189621,1.357097,148.25472,99.734993,"[-94.01250034395, 36.73749962910311]"
1,0.156155,0.264972,0.426985,130.503168,87.79304,"[-94.01250034395, 36.73749962910311]"
2,0.114223,0.219987,0.886029,138.696192,93.304711,"[-86.90416703905, 32.829166311403114]"
3,0.156155,0.264972,0.426985,130.503168,87.79304,"[-86.90416703905, 32.829166311403114]"
4,0.267157,0.39919,0.060149,74.43216,50.072544,"[-86.90416703905, 32.829166311403114]"
5,0.114223,0.219987,0.886029,138.696192,93.304711,"[-85.33750037865, 31.12916631820311]"
6,0.156155,0.264972,0.426985,130.503168,87.79304,"[-85.33750037865, 31.12916631820311]"
7,0.267157,0.39919,0.060149,74.43216,50.072544,"[-85.33750037865, 31.12916631820311]"
8,0.126229,0.233028,0.704491,136.050528,91.524901,"[-85.33750037865, 31.12916631820311]"
9,0.156155,0.264972,0.426985,130.503168,87.79304,"[-82.27083372425, 30.004166322703114]"


In [119]:
# Override soild
soil_row = df_soil.loc[0]
for col in soil_cols[:-1]:
    soild[col] = soil_row[col]

In [120]:
# Set latitude, longitude to the soil row's center
from ast import literal_eval
longitude, latitude = literal_eval(soil_row['center'])
latitude, longitude

(36.73749962910311, -94.01250034395)

In [121]:
# Run Wofost
# Retrieve Weather Data from NASA
wdp = NASAPowerWeatherDataProvider(latitude=latitude, longitude=longitude)
params = ParameterProvider(cropdata=cropd, sitedata=sited, soildata=soild)
wofost = Engine(params, wdp, agromanagement, config) #WLP_NPK

In [122]:
wofost.run_till_terminate()
r = wofost.get_summary_output()

WeatherDataProviderError: No weather data for 2018-02-08.

In [None]:
r

# Map Wofost output to actual yield_data

In [152]:
cols = ['County', 'Value', 'Year', 'State']
yield_data = pd.read_csv('actual_data/yield_usda/wheat_irrigated_country_annual.csv', usecols=cols)
yield_data.head()

Unnamed: 0,Year,State,County,Value
0,2007,CALIFORNIA,CONTRA COSTA,65.7
1,2007,CALIFORNIA,MONTEREY,105.0
2,2007,CALIFORNIA,OTHER (COMBINED) COUNTIES,95.0
3,2007,CALIFORNIA,SAN LUIS OBISPO,104.0
4,2007,CALIFORNIA,LASSEN,50.0


In [153]:
# Convert Actual Yield Data in bushels/acre to kg/ha
# 1 Bushels Per Acre to Kilograms Per Hectare = 67.2511 ## http://www.kylesconverter.com/area-density/bushels-per-acre-to-kilograms-per-hectare
conversion_rate = 67.2511 # for wheat: bu/acre to kg/ha
yield_data['Value'] = yield_data['Value'] * conversion_rate
yield_data[['Value']].head()

Unnamed: 0,Value
0,4418.39727
1,7061.3655
2,6388.8545
3,6994.1144
4,3362.555


In [154]:
## Get county coordinates
cols = ['county', 'latitude', 'longitude']
county_coords = pd.read_csv('actual_data/others/Geocodes_USA_with_Counties.csv', usecols=cols)
county_coords = county_coords.dropna()
county_coords = county_coords.drop_duplicates()
county_coords.head()

Unnamed: 0,latitude,longitude,county
0,40.81,-73.04,Suffolk
2,18.16,-66.72,Adjuntas
4,18.43,-67.15,Aguadilla
7,18.18,-66.98,Maricao
10,18.45,-66.73,Arecibo


In [158]:
yield_data['County'] = yield_data['County'].str.lower()
county_coords['county'] = county_coords['county'].str.lower()
clean_yield_data = yield_data.join(county_coords.set_index('county'), on='County', how='inner')
clean_yield_data = new_yield_data.drop_duplicates()
clean_yield_data.head()

Unnamed: 0,Year,State,County,Value,latitude,longitude
0,2007,CALIFORNIA,contra costa,4418.39727,37.86,-121.64
0,2007,CALIFORNIA,contra costa,4418.39727,37.78,-121.88
0,2007,CALIFORNIA,contra costa,4418.39727,37.84,-121.97
0,2007,CALIFORNIA,contra costa,4418.39727,37.99,-121.81
0,2007,CALIFORNIA,contra costa,4418.39727,38.07,-121.62


In [159]:
coords = new_yield_data[['latitude', 'longitude']]
coords.head()

Unnamed: 0,latitude,longitude
0,37.86,-121.64
0,37.78,-121.88
0,37.84,-121.97
0,37.99,-121.81
0,38.07,-121.62


# WOFOST Input/Output Matrix using real data
coords: latitude, longitue input

clean_yield_data: output in kg/ha
crop: constant
agro_yaml: constant

In [None]:
coords