In [1]:
import os
import pandas as pd
import subprocess
from jinja2 import Template
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.notebook import tqdm

In [2]:
KEYFILE_TEMPLATE = "../models/Base_FIA.key"
KEYFILE_OUTDIR = "../data/interim/usfia/keyfiles"
FVS_OUTDIR = "../data/interim/usfia/fvs_outputs"
FVSIN_DB = "../data/raw/SQLite_FIADB_ENTIRE.db"

In [3]:
for d in [KEYFILE_OUTDIR, FVS_OUTDIR]:
    os.makedirs(d, exist_ok=True)

In [4]:
def run_fvs(stand_id, fvs_variant, overwrite=False):
    with open(KEYFILE_TEMPLATE, "r") as base_keyfile:
        template = Template(base_keyfile.read())

    keyfile_name = f"{fvs_variant}_{stand_id}.key"
    keyfile_path = os.path.join(os.path.abspath(KEYFILE_OUTDIR), keyfile_name)

    db_name = f"{fvs_variant}_{stand_id}.db"
    db_path = os.path.join(os.path.abspath(FVS_OUTDIR), db_name)

    params = {
        "FVSIn": os.path.abspath(FVSIN_DB),
        "FVSOut": db_path,
        "stand_id": stand_id,
    }

    with open(keyfile_path, "w") as k:
        k.write(template.render(**params))

    proc = subprocess.run(
        [f"/usr/local/bin/FVS{fvs_variant.lower()}", f"--keywordfile={keyfile_path}"],
        stderr=subprocess.PIPE,
        stdout=subprocess.PIPE,
    )

    return proc

In [5]:
SQL = """
SELECT s.STAND_ID, s.VARIANT
FROM FVS_STANDINIT_COND s
INNER JOIN COND c
ON s.STAND_CN = c.CN
WHERE (c.DSTRBCD1 NOT IN (30, 31, 32, 80, 90, 91, 92, 93, 94, 95))
AND (c.DSTRBCD2 NOT IN (30, 31, 32, 80, 90, 91, 92, 93, 94, 95))
AND (c.DSTRBCD3 NOT IN (30, 31, 32, 80, 90, 91, 92, 93, 94, 95))
AND (c.TRTCD1 NOT IN (10, 20, 30, 50))
AND (c.TRTCD2 NOT IN (10, 20, 30, 50))
AND (c.TRTCD3 NOT IN (10, 20, 30, 50))
AND (c.COND_STATUS_CD = 1)
AND (c.CONDPROP_UNADJ > 0.20)
AND s.VARIANT NOT NULL
"""

stands = pd.read_sql(SQL, f"sqlite:///{os.path.abspath(FVSIN_DB)}")

stands.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400221 entries, 0 to 400220
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   STAND_ID  400221 non-null  object
 1   VARIANT   400221 non-null  object
dtypes: object(2)
memory usage: 6.1+ MB


In [6]:
stands.head()

Unnamed: 0,STAND_ID,VARIANT
0,00062008050803055960512,NC
1,00062008050803017899581,CA
2,00062008050806065548331,WS
3,00062016060605009737471,WS
4,00062016060603061600031,CA


In [7]:
results = []

with tqdm(total=len(stands)) as pbar:
    with ThreadPoolExecutor(48) as executor:
        jobs = [
            executor.submit(run_fvs, row["STAND_ID"], row["VARIANT"])
            for _, row in stands.iterrows()
        ]
        for job in as_completed(jobs):
            proc = job.result()
            keyfile = os.path.basename(proc.args[1].split("--keywordfile=")[-1])
            variant = keyfile.split("_")[0]
            stand_id = keyfile.split("_")[1].split(".")[0]
            results.append((variant, stand_id, keyfile, proc.returncode))
            pbar.update()

result_df = pd.DataFrame(
    results, columns=["VARIANT", "STAND_ID", "KEYFILE", "RETURN_CODE"]
)

  0%|          | 0/400221 [00:00<?, ?it/s]

In [8]:
result_df.to_csv("../models/usfia_run_status.csv", index=False, header=True)