In [1]:
import pandas as pd
import os
import numpy as np
from matplotlib import pyplot as plt

In [69]:
CALIB = "../data/processed/usfia_fvs_calibstats.csv"
CASES = "../data/processed/usfia_fvs_cases.csv"
FIA_DB = "../data/raw/SQLite_FIADB_ENTIRE.db"
USE_VARIANT = "NC"

In [70]:
cases = pd.read_csv(CASES)
cases.columns = [col.upper() for col in cases.columns]
cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388726 entries, 0 to 388725
Data columns (total 12 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   CASEID       388726 non-null  object 
 1   STAND_CN     388726 non-null  int64  
 2   STANDID      388726 non-null  object 
 3   MGMTID       388726 non-null  object 
 4   RUNTITLE     0 non-null       float64
 5   KEYWORDFILE  388726 non-null  object 
 6   SAMPLINGWT   388726 non-null  float64
 7   VARIANT      388726 non-null  object 
 8   VERSION      388726 non-null  object 
 9   RV           388726 non-null  int64  
 10  GROUPS       0 non-null       float64
 11  RUNDATETIME  388726 non-null  object 
dtypes: float64(3), int64(2), object(7)
memory usage: 35.6+ MB


In [71]:
# use_ids = cases.loc[cases.VARIANT == USE_VARIANT]["CASEID"].values
# use_cns = cases.loc[cases.VARIANT == USE_VARIANT]["STAND_CN"].values
use_ids = pd.unique(cases.CASEID)
use_cns = pd.unique(cases.STAND_CN)

In [72]:
SQL = f"""
SELECT s.STAND_ID as STANDID, 
    s.STAND_CN, 
    s.VARIANT, 
    s.LOCATION, 
    s.LATITUDE, 
    s.LONGITUDE, 
    c.SITECLCD, 
    c.FORTYPCD
FROM FVS_STANDINIT_COND s
INNER JOIN COND c
ON s.STAND_CN = c.CN
WHERE s.STAND_CN IN {tuple(use_cns)}
"""

covars = pd.read_sql(SQL, f"sqlite:///{os.path.abspath(FIA_DB)}")
covars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388726 entries, 0 to 388725
Data columns (total 8 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   STANDID    388726 non-null  object 
 1   STAND_CN   388726 non-null  object 
 2   VARIANT    388726 non-null  object 
 3   LOCATION   388726 non-null  int64  
 4   LATITUDE   388726 non-null  float64
 5   LONGITUDE  388726 non-null  float64
 6   SITECLCD   388726 non-null  float64
 7   FORTYPCD   388726 non-null  float64
dtypes: float64(4), int64(1), object(3)
memory usage: 23.7+ MB


In [73]:
spp = pd.read_sql("REF_SPECIES", f"sqlite:///{os.path.abspath(FIA_DB)}")
spp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2677 entries, 0 to 2676
Data columns (total 82 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   CN                            2677 non-null   int64         
 1   SPCD                          2677 non-null   float64       
 2   COMMON_NAME                   2677 non-null   object        
 3   SHARED_COMMON_NAME_IND        2677 non-null   object        
 4   GENUS                         2677 non-null   object        
 5   SPECIES                       2677 non-null   object        
 6   VARIETY                       21 non-null     object        
 7   SUBSPECIES                    8 non-null      object        
 8   SPECIES_SYMBOL                2677 non-null   object        
 9   E_SPGRPCD                     2677 non-null   int64         
 10  W_SPGRPCD                     2677 non-null   int64         
 11  C_SPGRPCD                     

In [74]:
fortyp = pd.read_sql("REF_FOREST_TYPE", f"sqlite:///{os.path.abspath(FIA_DB)}")
fortyp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   VALUE                 207 non-null    int64         
 1   MEANING               207 non-null    object        
 2   TYPGRPCD              207 non-null    int64         
 3   MANUAL_START          207 non-null    float64       
 4   MANUAL_END            16 non-null     float64       
 5   ALLOWED_IN_FIELD      207 non-null    object        
 6   CREATED_BY            207 non-null    object        
 7   CREATED_DATE          207 non-null    datetime64[ns]
 8   CREATED_IN_INSTANCE   207 non-null    object        
 9   MODIFIED_BY           180 non-null    object        
 10  MODIFIED_DATE         180 non-null    datetime64[ns]
 11  MODIFIED_IN_INSTANCE  180 non-null    object        
dtypes: datetime64[ns](2), float64(2), int64(2), object(6)
memory usage: 19.5+ KB


In [75]:
calib = pd.read_csv(CALIB)
calib.columns = [col.upper() for col in calib.columns]
calib = calib.loc[calib.CASEID.isin(use_ids)]
calib = calib.merge(
    spp[["SPCD", "COMMON_NAME"]], left_on="SPECIESFIA", right_on="SPCD", how="left"
)
calib.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274924 entries, 0 to 274923
Data columns (total 13 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   CASEID         274924 non-null  object 
 1   STANDID        274924 non-null  object 
 2   TREESIZE       274924 non-null  object 
 3   SPECIESFVS     274924 non-null  object 
 4   SPECIESPLANTS  274924 non-null  object 
 5   SPECIESFIA     274924 non-null  int64  
 6   NUMTREES       274924 non-null  int64  
 7   SCALEFACTOR    274924 non-null  float64
 8   STDERRRATIO    245659 non-null  float64
 9   WEIGHTTOINPUT  245659 non-null  float64
 10  READCORMULT    274924 non-null  float64
 11  SPCD           274924 non-null  float64
 12  COMMON_NAME    274924 non-null  object 
dtypes: float64(5), int64(2), object(6)
memory usage: 27.3+ MB


In [76]:
df = calib.merge(covars, left_on="STANDID", right_on="STANDID", how="left")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274924 entries, 0 to 274923
Data columns (total 20 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   CASEID         274924 non-null  object 
 1   STANDID        274924 non-null  object 
 2   TREESIZE       274924 non-null  object 
 3   SPECIESFVS     274924 non-null  object 
 4   SPECIESPLANTS  274924 non-null  object 
 5   SPECIESFIA     274924 non-null  int64  
 6   NUMTREES       274924 non-null  int64  
 7   SCALEFACTOR    274924 non-null  float64
 8   STDERRRATIO    245659 non-null  float64
 9   WEIGHTTOINPUT  245659 non-null  float64
 10  READCORMULT    274924 non-null  float64
 11  SPCD           274924 non-null  float64
 12  COMMON_NAME    274924 non-null  object 
 13  STAND_CN       274924 non-null  object 
 14  VARIANT        274924 non-null  object 
 15  LOCATION       274924 non-null  int64  
 16  LATITUDE       274924 non-null  float64
 17  LONGITUDE      274924 non-nul

In [77]:
abund = (
    df.groupby(by=["TREESIZE", "COMMON_NAME"])["SCALEFACTOR"]
    .count()
    .loc["LG"]
    .sort_values(ascending=False)
)
abund

COMMON_NAME
red maple              22274
loblolly pine          18596
sugar maple            11706
white oak               9362
quaking aspen           8155
                       ...  
Brewer spruce              3
pumpkin ash                2
bigcone Douglas-fir        2
Fremont cottonwood         1
Chihuahuan pine            1
Name: SCALEFACTOR, Length: 208, dtype: int64

In [78]:
df.groupby(by=["TREESIZE", "COMMON_NAME"])["SCALEFACTOR"].agg(
    ["mean", "median", "count"]
).loc["LG"].loc[abund.index]

Unnamed: 0_level_0,mean,median,count
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
red maple,1.053796,0.961644,22274
loblolly pine,1.406926,1.168922,18596
sugar maple,1.097416,0.992561,11706
white oak,1.004464,0.951590,9362
quaking aspen,1.063604,1.018803,8155
...,...,...,...
Brewer spruce,0.914909,0.861891,3
pumpkin ash,2.219711,2.219711,2
bigcone Douglas-fir,0.480316,0.480316,2
Fremont cottonwood,0.948565,0.948565,1


In [85]:
for var in np.unique(df.VARIANT):
    print(var)
    scalefactors = (
        df.loc[df.VARIANT == var]
        .groupby(by=["TREESIZE", "COMMON_NAME", "SITECLCD"])["SCALEFACTOR"]
        .median()
    )
    abund = (
        df.loc[df.VARIANT == var]
        .groupby(by=["TREESIZE", "COMMON_NAME"])["SCALEFACTOR"]
        .count()
        .loc["LG"]
        .nlargest(10)
    )
    pivot = pd.pivot_table(
        scalefactors.loc["LG"].reset_index(),
        index="COMMON_NAME",
        columns=["SITECLCD"],
        values="SCALEFACTOR",
    )
    display(pivot.loc[abund.index].round(2).fillna("--"))

AK


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
western hemlock,3.39,2.67,1.1,1.42,1.1,1.06,1.02
mountain hemlock,--,--,1.63,--,1.22,1.05,1.22
Sitka spruce,2.75,2.13,1.49,1.33,1.3,1.07,0.92
Alaska yellow-cedar,--,--,--,--,1.29,1.45,1.48
western redcedar,--,--,4.76,0.98,1.4,1.1,1.0
white spruce,--,--,--,3.98,1.8,1.68,1.25
lodgepole pine,--,--,--,--,--,1.88,1.72
paper birch,--,--,2.27,1.41,2.34,1.28,1.52
black spruce,--,--,--,--,1.05,1.67,1.16
black cottonwood,--,--,5.32,1.81,2.75,2.35,--


BM


SITECLCD,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ponderosa pine,0.89,0.61,0.91,0.76,0.73,0.99
grand fir,0.9,0.99,0.98,0.93,0.97,1.01
Douglas-fir,--,0.96,0.82,0.87,0.73,0.83
lodgepole pine,--,0.7,0.5,0.54,0.5,0.83
western juniper,--,--,--,1.0,0.85,1.08
Engelmann spruce,--,0.82,0.8,0.74,0.73,--
western larch,0.78,1.33,0.56,0.63,0.63,--
subalpine fir,--,0.59,0.69,0.6,0.4,0.44
whitebark pine,--,--,1.07,0.33,0.8,0.91
mountain hemlock,--,--,--,--,1.1,1.48


CA


SITECLCD,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Douglas-fir,1.31,1.09,1.03,1.05,0.96,1.07
white fir,0.97,0.94,0.91,0.84,0.9,0.73
canyon live oak,1.69,1.34,1.11,1.22,1.18,1.24
ponderosa pine,1.77,0.98,1.0,0.9,0.9,1.09
blue oak,--,--,--,--,--,1.04
California black oak,1.07,1.14,1.14,1.11,1.3,1.48
Pacific madrone,1.44,1.12,1.61,1.73,2.07,2.73
incense-cedar,0.85,1.14,1.48,1.12,1.13,1.14
sugar pine,--,1.02,0.94,0.99,1.13,--
interior live oak,--,2.35,--,1.11,--,1.49


CI


SITECLCD,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Douglas-fir,--,0.84,0.96,0.74,0.34,0.25
subalpine fir,--,--,0.82,0.71,0.48,0.38
lodgepole pine,--,--,0.41,0.87,0.55,0.45
ponderosa pine,--,0.94,1.39,0.94,0.53,--
Engelmann spruce,--,0.92,1.31,0.69,0.56,0.15
whitebark pine,--,--,--,2.44,0.59,0.85
grand fir,--,1.28,0.88,1.15,0.53,--
western juniper,--,--,--,1.1,1.18,1.03
curlleaf mountain-mahogany,--,--,--,--,1.91,0.75
limber pine,2.05,--,--,1.04,0.98,0.77


CR


SITECLCD,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ponderosa pine,--,1.09,1.05,0.95,0.61
unknown dead hardwood,--,1.5,1.0,1.0,0.75
Engelmann spruce,0.77,0.67,0.7,0.58,0.71
quaking aspen,--,0.38,0.48,0.47,0.42
Douglas-fir,1.01,0.79,0.65,0.58,0.69
subalpine fir,0.55,0.89,0.79,0.83,0.51
lodgepole pine,--,0.61,0.87,0.66,0.68
white fir,0.55,1.12,0.84,0.59,0.82
corkbark fir,--,0.62,0.98,0.75,0.73
blue spruce,0.83,0.65,0.64,0.4,0.68


CS


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
white oak,0.88,1.31,1.03,1.0,1.02,0.96,0.9
black oak,--,--,1.06,1.04,1.06,1.04,0.99
post oak,--,0.87,0.68,1.08,1.09,1.03,0.98
eastern redcedar,--,--,1.19,1.03,1.1,1.13,1.26
sugar maple,--,1.03,0.94,1.03,1.01,1.0,0.79
shagbark hickory,--,--,0.91,1.02,1.0,1.04,0.76
shortleaf pine,1.57,1.48,1.24,0.92,0.95,1.08,--
hackberry,--,--,1.04,1.01,1.02,0.96,--
American elm,--,1.58,0.97,1.1,1.08,1.06,0.96
silver maple,--,1.1,1.15,1.21,1.44,1.39,0.88


EC


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,1.19,1.44,0.92,0.95,0.9,0.77,0.68
ponderosa pine,--,0.69,0.87,0.95,0.89,0.84,0.84
subalpine fir,--,--,0.61,0.63,0.55,0.49,0.54
grand fir,--,1.77,0.6,0.59,0.59,0.59,--
Pacific silver fir,--,1.55,1.01,0.83,0.86,0.91,1.22
lodgepole pine,--,--,0.92,0.86,0.84,0.62,0.39
western larch,1.26,--,0.99,1.1,0.9,0.99,--
western hemlock,--,1.21,0.86,0.93,0.76,0.76,--
Engelmann spruce,--,1.0,0.98,0.98,0.95,0.93,1.28
mountain hemlock,--,--,1.12,0.9,0.96,0.91,0.83


EM


SITECLCD,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Douglas-fir,0.67,0.81,0.72,0.62
lodgepole pine,--,0.84,0.86,0.79
subalpine fir,0.57,0.94,0.97,0.97
ponderosa pine,0.71,0.91,0.48,0.36
Engelmann spruce,1.07,0.82,0.81,0.31
unknown dead hardwood,--,1.14,1.14,0.82
Rocky Mountain juniper,--,--,1.11,1.02
green ash,--,0.73,0.99,0.87
whitebark pine,--,0.56,0.72,0.58
quaking aspen,--,2.42,1.86,1.2


IE


SITECLCD,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Douglas-fir,0.88,0.96,0.98,0.89,0.73,0.33
subalpine fir,--,1.08,0.89,0.91,0.84,0.69
lodgepole pine,0.95,1.06,0.86,0.75,0.63,0.51
grand fir,1.16,1.02,0.93,0.91,1.01,--
Engelmann spruce,--,0.9,0.89,0.92,0.74,--
western redcedar,0.9,0.95,0.87,0.87,0.78,--
western larch,1.0,1.19,1.04,0.9,0.69,0.97
ponderosa pine,0.43,1.19,1.0,0.85,0.81,1.49
western hemlock,1.66,0.93,0.89,0.74,1.01,--
mountain hemlock,--,--,0.72,0.77,0.73,--


LS


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
red maple,1.22,0.97,1.05,1.0,1.01,0.99,0.95
quaking aspen,1.26,1.06,1.06,1.08,1.05,1.04,0.94
sugar maple,--,1.61,1.09,1.04,1.05,1.01,--
northern white-cedar,--,0.48,0.98,1.0,1.05,0.99,0.95
balsam fir,--,1.0,1.08,0.98,1.0,1.0,0.95
black ash,--,--,0.83,1.0,0.99,0.95,0.85
red pine,1.01,1.19,1.11,1.09,1.06,1.07,1.42
black spruce,--,--,0.95,1.06,1.09,1.06,1.27
paper birch,--,1.07,1.05,1.05,1.03,1.03,1.08
American basswood,--,1.58,1.32,1.14,1.1,1.05,1.08


NC


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,1.04,0.89,0.79,0.78,0.69,0.57,0.53
tanoak,0.67,0.65,0.71,0.59,0.48,0.47,0.45
California black oak,1.21,2.01,1.18,0.99,0.96,0.98,0.96
redwood,1.09,1.18,0.86,0.98,0.86,--,0.22
unknown dead hardwood,1.46,1.25,1.05,1.03,0.95,0.51,0.44
Pacific madrone,1.29,0.77,0.85,0.62,0.7,0.4,0.4
white fir,0.7,0.86,1.05,0.77,0.87,0.31,0.47
ponderosa pine,--,0.49,0.61,0.51,0.34,0.22,0.1
unknown dead conifer,1.73,2.02,1.05,0.56,0.79,0.18,0.21
sugar pine,--,--,0.12,0.9,0.57,0.27,0.24


NE


SITECLCD,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
red maple,0.72,0.94,0.95,0.95,1.0,0.86
sugar maple,1.27,0.92,0.9,0.95,1.04,0.82
balsam fir,--,0.95,1.11,1.24,1.17,--
eastern hemlock,1.09,0.85,0.91,0.9,1.09,0.92
red spruce,--,1.06,1.35,1.52,1.22,3.08
American beech,1.74,1.02,1.06,1.02,1.18,--
eastern white pine,0.27,0.61,0.72,0.8,0.75,--
yellow birch,--,1.0,1.14,1.1,1.16,--
northern red oak,1.0,1.11,1.15,1.09,1.01,2.08
black cherry,1.08,0.39,0.58,0.64,0.81,0.99


PN


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,0.95,0.94,0.96,0.95,1.03,0.93,0.72
western hemlock,0.78,0.78,0.85,0.94,0.92,0.77,0.84
red alder,0.49,0.73,0.6,0.71,0.76,0.53,0.87
western redcedar,0.71,0.92,0.95,1.02,1.09,0.98,0.51
bigleaf maple,--,0.85,1.1,1.91,2.82,2.35,0.3
Pacific silver fir,--,0.77,0.92,0.78,0.87,0.76,1.28
Sitka spruce,0.72,0.83,0.64,0.61,--,--,0.13
other or unknown live tree,--,0.65,0.78,1.12,1.0,--,0.9
lodgepole pine,--,1.06,--,0.42,0.57,0.9,0.78
grand fir,--,--,0.81,0.79,0.95,--,--


SN


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
loblolly pine,1.17,1.19,1.13,1.16,1.2,1.22,1.07
sweetgum,1.0,0.94,0.96,0.97,0.97,1.0,0.82
red maple,0.8,0.93,0.93,0.93,0.93,0.87,0.61
white oak,0.9,0.93,0.93,0.95,0.92,0.92,0.71
yellow-poplar,0.85,0.96,0.98,0.99,1.0,1.06,--
hickory spp.,0.89,1.0,1.02,1.07,1.05,1.04,0.99
chestnut oak,0.87,0.65,0.92,0.96,0.94,0.91,0.82
water oak,0.73,0.83,0.88,0.9,0.92,0.93,0.81
slash pine,1.51,1.12,1.11,1.2,1.23,1.05,0.7
shortleaf pine,0.64,0.88,0.92,0.98,1.02,1.08,0.81


SO


SITECLCD,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ponderosa pine,1.74,0.98,0.93,0.83,0.82,0.41
lodgepole pine,--,0.32,0.62,0.74,0.92,0.78
white fir,1.7,1.0,0.9,0.89,0.91,1.06
western juniper,--,--,1.2,1.03,0.83,0.97
Shasta red fir,0.94,0.95,0.81,0.83,0.77,0.92
mountain hemlock,--,1.2,0.91,0.99,0.91,0.64
incense-cedar,0.99,1.29,1.06,1.02,0.99,--
Douglas-fir,--,1.04,0.95,0.78,0.89,0.34
sugar pine,--,0.25,0.45,0.48,0.37,--
Oregon white oak,1.67,0.87,0.93,--,1.09,0.73


TT


SITECLCD,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Douglas-fir,0.98,0.94,0.91,0.97
lodgepole pine,0.85,1.17,0.94,0.95
subalpine fir,0.51,0.84,0.92,--
quaking aspen,0.86,1.01,1.12,1.39
Utah juniper,--,--,--,0.96
curlleaf mountain-mahogany,--,--,--,0.71
bigtooth maple,--,--,--,1.57
Engelmann spruce,--,1.06,0.9,--
Rocky Mountain juniper,--,1.04,--,2.94
singleleaf pinyon,--,--,--,0.9


UT


SITECLCD,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
quaking aspen,2.58,1.17,1.19,1.39
subalpine fir,1.28,1.04,0.98,2.09
Douglas-fir,1.15,1.61,1.29,1.1
Engelmann spruce,0.6,0.85,0.83,0.5
white fir,--,1.07,0.96,0.87
lodgepole pine,--,1.07,0.93,0.87
ponderosa pine,--,0.77,0.7,1.26
limber pine,--,0.78,0.86,0.6
blue spruce,--,1.77,1.56,--
narrowleaf cottonwood,--,--,0.36,1.14


WC


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,1.13,0.98,0.99,1.02,1.01,0.93,1.13
western hemlock,0.82,0.95,1.03,0.95,0.95,0.89,1.12
Pacific silver fir,0.89,0.9,0.9,0.95,0.95,0.92,0.99
western redcedar,0.96,1.14,1.08,1.23,1.03,0.96,--
mountain hemlock,--,0.97,1.11,1.06,1.02,0.88,0.79
white fir,--,0.8,0.94,0.92,0.89,0.48,--
red alder,0.93,0.5,0.54,0.41,0.53,0.76,0.75
noble fir,0.57,1.1,1.0,0.98,0.83,0.79,--
bigleaf maple,--,0.73,0.89,1.17,--,2.03,--
incense-cedar,--,0.68,1.01,1.08,1.26,1.01,0.83


WS


SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
white fir,1.4,1.19,1.04,0.97,0.98,0.84,1.29
incense-cedar,0.41,0.77,0.74,0.77,0.59,0.53,0.35
lodgepole pine,--,1.49,1.12,0.94,0.86,0.93,0.93
Jeffrey pine,--,--,0.92,0.95,0.88,0.81,0.95
California red fir,0.96,0.98,1.12,1.04,1.12,1.14,1.03
singleleaf pinyon,--,--,--,0.39,0.28,0.39,0.25
ponderosa pine,1.69,1.01,1.05,0.9,0.77,0.93,2.77
Douglas-fir,1.06,1.32,0.92,0.81,0.81,--,0.54
canyon live oak,--,1.2,0.82,0.98,0.87,0.8,0.69
California black oak,--,0.96,0.84,1.0,1.0,0.75,0.86


In [58]:
counts = df.groupby(by=["TREESIZE", "COMMON_NAME", "SITECLCD"])["SCALEFACTOR"].count()
pivot_counts = pd.pivot_table(
    counts.loc["LG"].reset_index(),
    index="COMMON_NAME",
    columns="SITECLCD",
    values="SCALEFACTOR",
)
pivot_counts.loc[abund.index].astype("Int64")

SITECLCD,1.0,2.0,3.0,4.0,5.0,6.0,7.0
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,14.0,59.0,210.0,190,118,43.0,16.0
tanoak,14.0,31.0,116.0,109,65,17.0,5.0
California black oak,1.0,3.0,30.0,49,44,16.0,115.0
redwood,32.0,48.0,52.0,26,6,,1.0
unknown dead hardwood,9.0,16.0,51.0,32,14,6.0,28.0
Pacific madrone,2.0,7.0,27.0,38,21,5.0,13.0
white fir,1.0,3.0,7.0,22,15,1.0,2.0
ponderosa pine,,1.0,6.0,6,9,9.0,5.0
unknown dead conifer,6.0,1.0,2.0,2,2,7.0,8.0
sugar pine,,,1.0,4,4,12.0,3.0


In [64]:
scalefactors_loc = df.groupby(by=["TREESIZE", "COMMON_NAME", "LOCATION"])[
    "SCALEFACTOR"
].median()
pivot_loc = pd.pivot_table(
    scalefactors_loc.loc["LG"].reset_index(),
    index="COMMON_NAME",
    columns="LOCATION",
    values="SCALEFACTOR",
)
pivot_loc.loc[abund.index].round(2).fillna("--")

LOCATION,505,507,508,510,518,611,705
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,0.75,1.09,0.91,0.79,0.75,0.59,0.79
tanoak,0.72,0.47,0.64,0.58,0.67,0.47,0.84
California black oak,1.16,1.01,0.97,0.99,0.97,0.75,--
redwood,--,0.85,0.93,1.23,--,0.58,--
unknown dead hardwood,1.75,0.92,0.68,1.05,2.11,1.06,2.1
Pacific madrone,0.71,0.78,0.55,0.8,0.88,0.47,0.92
white fir,0.69,--,0.83,0.94,0.9,1.54,--
ponderosa pine,0.1,0.3,0.64,0.34,0.55,0.22,0.36
unknown dead conifer,0.15,2.05,0.29,1.24,--,0.73,--
sugar pine,0.19,--,0.98,0.35,--,0.17,--


In [65]:
scalefactors_loc_count = df.groupby(by=["TREESIZE", "COMMON_NAME", "LOCATION"])[
    "SCALEFACTOR"
].count()
pivot_loc_count = pd.pivot_table(
    scalefactors_loc_count.loc["LG"].reset_index(),
    index="COMMON_NAME",
    columns="LOCATION",
    values="SCALEFACTOR",
)
pivot_loc_count.loc[abund.index].astype("Int64")

LOCATION,505,507,508,510,518,611,705
COMMON_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Douglas-fir,51.0,8.0,110.0,246.0,18.0,206.0,11.0
tanoak,15.0,13.0,72.0,150.0,2.0,93.0,12.0
California black oak,10.0,37.0,121.0,69.0,6.0,15.0,
redwood,,28.0,70.0,64.0,,3.0,
unknown dead hardwood,2.0,10.0,47.0,53.0,2.0,41.0,1.0
Pacific madrone,10.0,10.0,44.0,34.0,2.0,9.0,4.0
white fir,13.0,,9.0,21.0,6.0,2.0,
ponderosa pine,3.0,1.0,7.0,15.0,4.0,5.0,1.0
unknown dead conifer,5.0,1.0,7.0,11.0,,4.0,
sugar pine,3.0,,3.0,15.0,,3.0,


In [None]:
def weighted_percentile_of_score(x, weights, score, kind="weak"):
    # copied from https://stackoverflow.com/q/48266788/7638539
    npx = np.array(x)
    npw = np.array(weights)

    if kind == "rank":  # Equivalent to 'weak' since we have weights.
        kind = "weak"

    if kind in ["strict", "mean"]:
        indx = npx < score
        strict = sum(npw[indx]) / sum(weights)
    if kind == "strict":
        return strict

    if kind in ["weak", "mean"]:
        indx = npx <= score
        weak = sum(npw[indx]) / sum(weights)
    if kind == "weak":
        return weak

    if kind == "mean":
        return (strict + weak) / 2

In [None]:
fig, axs = plt.subplots(3, 4, sharex=True, sharey=True, figsize=(12, 8))
spp = np.unique(calib.loc[calib.TreeSize == "LG"]["SpeciesPLANTS"])
for i, sp in enumerate(spp):
    data = calib.loc[(calib.TreeSize == "LG") & (calib.SpeciesPLANTS == sp)]
    sns.histplot(
        data=data,
        x="ScaleFactor",
        weights="NumTrees",
        ax=axs.ravel()[i],
        binwidth=0.05,
        stat="proportion",
    )
    axs.ravel()[i].set_title(f"{sp} (n={data['NumTrees'].sum():,.0f})")
    axs.ravel()[i].axvline(1.0, lw=1.0, ls=":", color="gray")
    wq = DescrStatsW(data=data.ScaleFactor, weights=data.NumTrees)
    lo, mid, hi = wq.quantile(probs=[0.25, 0.5, 0.75], return_pandas=False)
    axs.ravel()[i].axvline(mid, lw=1.0, color="red")
    axs.ravel()[i].axvspan(lo, hi, color="red", alpha=0.25)
    pct = weighted_percentile_of_score(data.ScaleFactor, data.NumTrees, 1.0)
    axs.ravel()[i].text(
        0.95, 0.90, f"1.0 = {pct:.0%}", transform=axs.ravel()[i].transAxes, ha="right"
    )
axs.ravel()[i].set_xlim(0, 3)
plt.tight_layout()
plt.show()