In [1]:
import os
import pcse
import yaml
import h5py
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
from multiprocessing import Pool
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import seaborn as sns
from pcse.models import Wofost71_PP, Wofost71_WLP_FD
from pcse.base import ParameterProvider
from pcse.db import NASAPowerWeatherDataProvider
from pcse import fileinput as fi
from pcse.util import WOFOST71SiteDataProvider

In [2]:
agro_yaml = """
Version: 1.0
AgroManagement:
- {sim_start_date}:
    CropCalendar:
        crop_name: maize
        variety_name: Maize_VanHeemst_1988
        crop_start_type: sowing
        crop_start_date: {crop_start_date}
        crop_end_type: earliest
        crop_end_date: {crop_end_date}
        max_duration: 300
    TimedEvents: 
    -   event_signal: apply_n
        name:  Nitrogen application table
        comment: All nitrogen amounts in g N m-2
        events_table:
        - {fert_date}: {{amount: {n_rate}, recovery: 0.7}}
    StateEvents: null
"""

In [3]:
soildata = {'SMFCF': 0.2, 'SMW': 0.1, 'CRAIRC': 0.1, 'KSUB': 20.0,
            'RDMSOL': 200.0, 'K0': 100.0, 'SOPE': 10.0, 'SM0': 0.4}

cropdata = fi.YAMLCropDataProvider()
cropdata.set_active_crop('maize', 'Maize_VanHeemst_1988')

sitedata = WOFOST71SiteDataProvider(WAV=100, CO2=400)

parameters = ParameterProvider(cropdata=cropdata, soildata=soildata, sitedata=sitedata)

In [10]:
def run(wfile):
    try:

        site = os.path.basename(wfile).replace('.csv', '')
        wdp = fi.CSVWeatherDataProvider(wfile)

        # get new start date
        crop_start_date = pd.to_datetime(site[18:26], format='%Y%m%d').date()
        sim_start_date = crop_start_date - dt.timedelta(days = 90)
        crop_end_date = crop_start_date + dt.timedelta(days = 210)
        fert_date = crop_start_date + dt.timedelta(days = 30)
        n_rate = 100

        # update agromanagement with new start date and load it with yaml.load
        tmp = agro_yaml.format(sim_start_date = sim_start_date,
                               crop_start_date = crop_start_date,
                               crop_end_date = crop_end_date,
                               fert_date = fert_date,
                               n_rate = n_rate)
        agromanagement = yaml.load(tmp, yaml.FullLoader)

        wofsim = Wofost71_WLP_FD(parameters, wdp, agromanagement)
        wofsim.run_till_terminate()
        output = wofsim.get_output()
        df = pd.DataFrame(output).set_index("day")
        df['SIM'] = site
        df['SIM_DATE'] = crop_start_date
        return(df.iloc[-1])
    except:
        return(pd.DataFrame())


In [11]:
wdir = '../../../Apsim_test/MASAGRO/DAYMET_TILE'

train_methods = ['none', 'rnd', 'adv']
wfiles = sorted(os.listdir(wdir))
wfiles_l = []
for REP in range(1,3):
    for train_method in train_methods:
        for PCT in [1,5]:
            fp = f'_opt_{train_method}_{PCT}_{REP}.csv'
            [wfiles_l.append(os.path.join(wdir, f)) for f in wfiles if f.endswith(fp)]
wfiles = wfiles_l
print(len(wfiles))

38400


In [12]:
w = pd.read_csv(wfiles[0], skiprows = 13)
# w

In [13]:
df_test = run(wfiles[0])

In [25]:
# output = []
# if __name__ == '__main__':
#     with Pool(processes=30) as p:
#         with tqdm(total=len(wfiles)) as pbar:
#             for i, df in enumerate(p.imap_unordered(run, wfiles)):
#                 output.append(df)
#                 pbar.update()

outputf = [o if len(o) > 0 else df for o in output]
output_df = pd.DataFrame(outputf)
output_df.to_hdf('../data/PSCE_TILE_opt.h5', key = 'SIM')

In [21]:
output_df = pd.read_hdf('../data/PSCE_TILE_opt.h5', key = 'SIM')
output_df = output_df.sort_values('SIM')

In [None]:
yy = []
for train_method in train_methods:
    for train_pct in [1,5]:
        yp = np.load(f'../data/y_pred_opt_{train_method}_0{train_pct}.npy')
        ydf = pd.DataFrame(yp, columns=['Yield0', 'Yield_adv'])
        ydf['train_method'] = train_method
        ydf['train_pct'] = train_pct
        ydf['id'] = np.arange(len(ydf))
        yy.append(ydf)
yy = pd.concat(yy)
yy = yy.sort_values(['id', 'train_method', 'train_pct'])
yy['SIM'] = output_df.SIM.values
yy['Yield'] = output_df.TWSO.values

In [None]:
yy

In [None]:
yy.Yield.hist()
yy.Yield_adv.hist()

In [None]:
yy['Yield_diff'] = yy.Yield - yy.Yield_adv

In [None]:
yy.groupby('train_method').Yield_diff.mean()

In [None]:
from sklearn.metrics import r2_score, mean_squared_error

In [None]:
def rmse( g ):
    rmse = np.sqrt( mean_squared_error( g['Yield'], g['Yield_adv'] ) )
    return pd.Series( dict( rmse = rmse ) )

yy.groupby(['train_method', 'train_pct']).apply( rmse ).reset_index()

In [None]:
plt.subplots(figsize=(7,6), dpi=100)
sns.distplot( yy.loc[yy.train_method=='none', "Yield_diff"] , color="dodgerblue", label="None")
sns.distplot( yy.loc[yy.train_method=='rnd', "Yield_diff"] , color="orange", label="Random")
sns.distplot( yy.loc[yy.train_method=='adv', "Yield_diff"] , color="deeppink", label="Adversarial")

plt.legend()

In [None]:
sns.scatterplot('Yield_adv', 'Yield', 'train_method', data = yy.loc[yy.train_pct == 1])

In [None]:
sns.scatterplot('Yield_adv', 'Yield', 'train_method', data = yy.loc[yy.train_pct == 5])