In [33]:
import numpy as np
import os
import pandas as pd
from jinja2 import Template
import subprocess
#from matplotlib import pyplot as plt
#from multiprocessing import Pool
import multiprocessing
import sqlite3
import glob
import openpyxl

from functools import partial

In [35]:
np.set_printoptions(precision=8, suppress=True)

In [65]:
params = [202,   # fia species code
      1,   # 'FIXDGm_0'
      1,   # 'FIXDGm_1'
      1,   # 'FIXDGm_2'
      1,   # 'FIXDGm_3'
      1,   # 'FIXDGm_4'
      1,   # 'MORTm_0'
      1,   # 'MORTm_1'
      1,   # 'MORTm_2'
      1,   # 'MORTm_3'
      1,   # 'MORTm_4'
      600,   # 'SDI_MAX'
] 
stand_id = '00062008050801015519691'

In [4]:
def make_param_dict(params, stand_id):
    ''' Single species
        FIXDG (diameter growth multiplier by species and diameter class)
        MORTMULT (background mortality rate multiplier by species and diameter class)
        SDIMAX (density-driven mortality parameter set for each species)
    '''
    param_dict = {}
    #stand input
    param_dict['stand_id'] = stand_id
    
    #input and output databases
    param_dict['FVSIn'] = '../data/FVSIn.db'
    param_dict['FVSOut'] = '../keyfiles/{}.db'.format(stand_id)
    
    #growth factors
    #size classes: 3-5, 5-10, 10-15, 15-20, >20
    param_dict['FIXDGm_sp'] = int(params[0])
    param_dict['FIXDGm_0'] = round(params[1],8)
    param_dict['FIXDGm_1'] = round(params[2],8)
    param_dict['FIXDGm_2'] = round(params[3],8)
    param_dict['FIXDGm_3'] = round(params[4],8)
    param_dict['FIXDGm_4'] = round(params[5],8)
    #background mortality factors
    param_dict['MORTm_sp'] = int(params[0])
    param_dict['MORTm_0'] = round(params[6],8)
    param_dict['MORTm_1'] = round(params[7],8)
    param_dict['MORTm_2'] = round(params[8],8)
    param_dict['MORTm_3'] = round(params[9],8)
    param_dict['MORTm_4'] = round(params[10],8)

    #density driven mortality factors
    param_dict['SDI_MAX'] = int(params[11])
    param_dict['SDI_LB'] = 55
    param_dict['SDI_UB'] = 85
        
    return param_dict


In [5]:
param_dict = make_param_dict(params, stand_id)
param_dict

{'stand_id': '00062008050801015519691',
 'FVSIn': '../data/FVSIn.db',
 'FVSOut': '../keyfiles/00062008050801015519691.db',
 'FIXDGm_sp': 202,
 'FIXDGm_0': 1,
 'FIXDGm_1': 1,
 'FIXDGm_2': 1,
 'FIXDGm_3': 1,
 'FIXDGm_4': 1,
 'MORTm_sp': 202,
 'MORTm_0': 1,
 'MORTm_1': 1,
 'MORTm_2': 1,
 'MORTm_3': 1,
 'MORTm_4': 1,
 'SDI_MAX': 600,
 'SDI_LB': 55,
 'SDI_UB': 85}

In [78]:
def get_keyfile_template(path_to_template):
    with open(path_to_template, 'r') as base_keyfile:
        template = Template(base_keyfile.read())
    return template

def make_keyfile(template, param_dict):
    filename = '../keyfiles/{}.key'.format(param_dict['stand_id'])
    keyfile_path = os.path.abspath(filename)
    with open(keyfile_path,'w') as keyfile:
        keyfile.write(template.render(**param_dict))

    return keyfile_path

def run_fvs(params, stand_id):
    param_dict = make_param_dict(params, stand_id)
    KEYFILE_TEMPLATE = '../models/Base_Rx.key'
    template = get_keyfile_template(KEYFILE_TEMPLATE)
    keyfile = make_keyfile(template, param_dict)

    proc = subprocess.call(['/usr/local/bin/FVSnc',
                           '--keywordfile={}'.format(keyfile)],
                          stderr=subprocess.PIPE,
                          stdout=subprocess.PIPE)

    # cleanup output files
    os.remove('../keyfiles/{}.trl'.format(stand_id))
    os.remove('../keyfiles/{}.out'.format(stand_id))
    os.remove('../keyfiles/{}.key'.format(stand_id))

    return proc

In [22]:
proc = run_fvs(params, stand_id)

In [38]:
def get_groundtruth(stand_id):
    STAND_DATA = '../data/groundtruth.xlsx'
    df = pd.read_excel(STAND_DATA).set_index('stand_id')

    return df.loc[stand_id]

In [39]:
gt = get_groundtruth(stand_id)
gt

STAND_ID2    8.010155e+11
growth       1.159811e+02
mort        -2.195334e+01
acc          9.402778e+01
Name: 00062008050801015519691, dtype: float64

In [41]:
def get_run_data(stand_id):
    CONN_STR = sqlite3.connect('../keyfiles/{}.db'.format(stand_id))
    run_data = pd.read_sql_query("SELECT * from fvs_summary", CONN_STR)
    #run_data = pd.read_sql_query("SELECT * from fvs_summary", CONN_STR).set_index('standid')
    #run_data = pd.read_sql_query("SELECT * from fvs_compute", CONN_STR).set_index('standid')
    
    return run_data

In [42]:
rd = get_run_data(stand_id)
rd

Unnamed: 0,CaseID,StandID,Year,Age,Tpa,BA,SDI,CCF,TopHt,QMD,...,ATCCF,ATTopHt,ATQMD,PrdLen,Acc,Mort,MAI,ForTyp,SizeCls,StkCls
0,63923a2b-2d11-4123-b73f-487890c39d69,00062008050801015519691,2008,450,563,359,509,427,118,10.820225,...,427,118,10.820225,10,88,305,23.404444,201,1,1
1,63923a2b-2d11-4123-b73f-487890c39d69,00062008050801015519691,2018,460,267,285,373,316,115,13.982214,...,316,115,13.982214,0,0,0,19.91087,201,1,1


In [58]:
def get_residuals(stand_id):

    METRICS = ['Acc','Mort']
    #ACC = ['comp_acc']
    #MORT = ['comp_mort']
    pred = get_run_data(stand_id)[METRICS].head(1)
    #acc_pre = get_run_data(stand_id)[ACC].first()
    #acc_post = get_run_data(stand_id)[ACC].last()
    #pred_acc = acc_post - acc_pre
    #pred_mort = get_run_data(stand_id)[MORT].first()
    #pred = pred_acc.merge(pred_mort, on='stand_id')
    obs = get_groundtruth(stand_id)[METRICS]

    residuals = pred - obs

    
    return residuals

In [59]:
res = get_residuals(stand_id)
res

Unnamed: 0,Acc,Mort
0,-6.027783,326.953338


In [75]:
def run_score_batch(params, all_stands, sample_size=5, num_cores=16, target='both'):
    """
    Objective function to be optimized by PSO.

    Parameters
    ----------
    params : array
      parameters being tested in this step of the PSO
    stand_ids : array
      stands that will be simulated in this step of the PSO
    num_cores : int
      number of cores that will be used for parallel processing
    target : str
      one of 'growth', 'mortality', or 'both'

    Returns
    -------
    obj_fun : scalar
      score of objective function on this batch of simulations
    """
    stand_ids = np.random.choice(all_stands, sample_size)
    map_to_run = partial(run_fvs, params)
    with multiprocessing.Pool(num_cores) as p:
        procs = p.map(map_to_run, stand_ids)
        resids = p.map(get_residuals, stand_ids)
    
    resid = pd.concat(resids, axis=0, ignore_index=True)

    # sse = ((residuals)**2)
    # mae = residuals.abs()
    # bias = residuals

    growth_sse = (resid['Acc']**2).sum()
    growth_mae = resid['Acc'].abs().mean()
    growth_bias = resid['Acc'].mean()
        
    mort_sse = (resid['Mort']**2).sum()
    mort_mae = resid['Mort'].abs().mean()
    mort_bias = resid['Mort'].mean()
    
    if target == 'growth':
        obj_fun = growth_sse
    elif target == 'mortality':
        obj_fun = mort_sse
    else: 
        obj_fun = growth_sse + mort_sse

    return obj_fun

In [73]:
stand_data = pd.read_excel('../data/groundtruth.xlsx')
all_stands = np.unique(stand_data.stand_id)

In [76]:
test = run_score_batch(params, all_stands)

In [77]:
test

437877.7364175144