In [1]:
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
import pandas as pd
from shapely.geometry import Point
import rasterio
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
from pandas import DataFrame, Series
from matplotlib.pyplot import Axes
from osgeo import gdal

## Data wrangling

- read field sampling data
- remove unwanted columns from the field data
- rename columns to meaningful names
- created smaller database with just average results

set up lookup tables and helper functions

In [2]:
# superset of column names
all_cols = {
    'globalid': ['id', 'Unique Site ID'],
    'Site': ['site', 'Site No'],
    'plot': ['plot', 'Plot No'], 
    'Date': ['date', 'Date'], 
    'Name': ['observer', 'Assessor Name'], 
    'Fuel': ['veg_type', 'Vegetation Class'],
    'FireHistor': ['fire_hist', 'Time since Fire (y)'], 
    'latitude': ['lat', 'Latitude (°)'], 
    'longitude': ['lon', 'Longitude (°)'],
    'horaccmete': ['loc_accuracy', 'Location accuracy (m)'],
    'FuelDepth1': ['litter_d_1', 'Litter Depth: Sample 1 (mm)'],
    'Cover1': ['litter_state_1', 'Litter State: Sample 1 (presence/absence)'], 
    'NearSurfac': ['ns_h_1', 'Near surface fuel height: Sample 1 (m)'],
    'NearSurf_1': ['ns_state_1', 'Near surface fuel state: Sample 1 (absent/alive/dead)'],
    'Elevated1': ['elev_h_1', 'Elevated fuel height: Sample 1 (m)'],
    'ElevatedCo': ['elev_state_1', 'Elevated fuel state: Sample 1 (absent/alive/dead)'],
    'CanopyHeig': ['canopy_h_1', 'Canopy height: Sample 1 (m)'],
    'CanopyCove': ['canopy_cov_1', 'Canopy cover: Sample 1 (%)'],
    'FuelDepth2': ['litter_d_2', 'Litter Depth: Sample 2 (mm)'], 
    'Cover2': ['litter_state_2', 'Litter State: Sample 2 (presence/absence)'],
    'NearSurf_2': ['ns_h_2', 'Near surface fuel height: Sample 2 (m)'], 
    'NearSurf_3': ['ns_state_2', 'Near surface fuel state: Sample 2 (absent/alive/dead)'],
    'Elevated2': ['elev_h_2', 'Elevated fuel height: Sample 2 (m)'],
    'Elevated_1': ['elev_state_2', 'Elevated fuel state: Sample 2 (absent/alive/dead)'],
    'FuelDepth3': ['litter_d_3', 'Litter Depth: Sample 3 (mm)'],
    'Cover3': ['litter_state_3', 'Litter State: Sample 3 (presence/absence)'],
    'NearSurf_4': ['ns_h_3', 'Near surface fuel height: Sample 3 (m)'],
    'NearSurf_5': ['ns_state_3', 'Near surface fuel state: Sample 3 (absent/alive/dead)'],
    'Elevated3': ['elev_h_3', 'Elevated fuel height: Sample 3 (m)'],
    'Elevated_2': ['elev_state_3', 'Elevated fuel state: Sample 3 (absent/alive/dead)'],
    'CanopyHe_1': ['canopy_h_3', 'Canopy height: Sample 3 (m)'],
    'CanopyCo_1': ['canopy_cov_3', 'Canopy cover: Sample 3 (%)'],
    'FuelDepth4': ['litter_d_4', 'Litter Depth: Sample 4 (mm)'],
    'Cover4': ['litter_state_4', 'Litter State: Sample 4 (presence/absence)'],
    'NearSurf_6': ['ns_h_4', 'Near surface fuel height: Sample 4 (m)'],
    'NearSurf_7': ['ns_state_4', 'Near surface fuel state: Sample 4 (absent/alive/dead)'],
    'Elevated4': ['elev_h_4', 'Elevated fuel height: Sample 4 (m)'],
    'Elevated_3': ['elev_state_4', 'Elevated fuel state: Sample 4 (absent/alive/dead)'],
    'FuelDepth5': ['litter_d_5', 'Litter Depth: Sample 5 (mm)'],
    'Cover5': ['litter_state_5', 'Litter State: Sample 5 (presence/absence)'],
    'NearSurf_8': ['ns_h_5', 'Near surface fuel height: Sample 5 (m)'],
    'NearSurf_9': ['ns_state_5', 'Near surface fuel state: Sample 5 (absent/alive/dead)'],
    'Elevated5': ['elev_h_5', 'Elevated fuel height: Sample 5 (m)'],
    'Elevated_4': ['elev_state_5', 'Elevated fuel state: Sample 5 (absent/alive/dead)'],
    'CanopyHe_2': ['canopy_h_5', 'Canopy height: Sample 5 (m)'],
    'CanopyCo_2': ['canopy_cov_5', 'Canopy cover: Sample 5 (%)'],
    'FuelDepth6': ['litter_d_6', 'Litter Depth: Sample 6 (mm)'],
    'Cover6': ['litter_state_6', 'Litter State: Sample 6 (presence/absence)'],
    'NearSur_10': ['ns_h_6', 'Near surface fuel height: Sample 6 (m)'],
    'NearSur_11': ['ns_state_6', 'Near surface fuel state: Sample 6 (absent/alive/dead)'],
    'Elevated6': ['elev_h_6', 'Elevated fuel height: Sample 6 (m)'],
    'Elevated_5':['elev_state_6', 'Elevated fuel state: Sample 6 (absent/alive/dead)'],
    'FuelDepth7': ['litter_d_7', 'Litter Depth: Sample 7 (mm)'],
    'Cover7': ['litter_state_7', 'Litter State: Sample 7 (presence/absence)'],
    'NearSur_12': ['ns_h_7', 'Near surface fuel height: Sample 7 (m)'],
    'NearSur_13': ['ns_state_7', 'Near surface fuel state: Sample 7 (absent/alive/dead)'],
    'Elevated7': ['elev_h_7', 'Elevated fuel height: Sample 7 (m)'],
    'Elevated_6': ['elev_state_7', 'Elevated fuel state: Sample 7 (absent/alive/dead)'],
    'CanopyHe_3': ['canopy_h_7', 'Canopy height: Sample 7 (m)'],
    'CanopyCo_3': ['canopy_cov_7', 'Canopy cover: Sample 7 (%)'],
    'FuelDepth8': ['litter_d_8', 'Litter Depth: Sample 8 (mm)'], 
    'Cover8': ['litter_state_8', 'Litter State: Sample 8 (presence/absence)'],
    'NearSur_14': ['ns_h_8', 'Near surface fuel height: Sample 8 (m)'],
    'NearSur_15': ['ns_state_8', 'Near surface fuel state: Sample 8 (absent/alive/dead)'],
    'Elevated8': ['elev_h_8', 'Elevated fuel height: Sample 8 (m)'],
    'Elevated_7': ['elev_state_8', 'Elevated fuel state: Sample 8 (absent/alive/dead)'],
    'FuelDepth9': ['litter_d_9', 'Litter Depth: Sample 9 (mm)'],
    'Cover9': ['litter_state_9', 'Litter State: Sample 9 (presence/absence)'],
    'NearSur_16': ['ns_h_9', 'Near surface fuel height: Sample 9 (m)'],
    'NearSur_17': ['ns_state_9', 'Near surface fuel state: Sample 9 (absent/alive/dead)'],
    'Elevated9': ['elev_h_9', 'Elevated fuel height: Sample 9 (m)'],
    'Elevated_8': ['elev_state_9', 'Elevated fuel state: Sample 9 (absent/alive/dead)'],
    'CanopyHe_4': ['canopy_h_9', 'Canopy height: Sample 9 (m)'],
    'CanopyCo_4': ['canopy_cov_9', 'Canopy cover: Sample 9 (%)'],
    'FuelDept_1': ['litter_d_10', 'Litter Depth: Sample 10 (mm)'],
    'Cover10': ['litter_state_10', 'Litter State: Sample 10 (presence/absence)'],
    'NearSur_18': ['ns_h_10', 'Near surface fuel height: Sample 10 (m)'],
    'NearSur_19': ['ns_state_10', 'Near surface fuel state: Sample 10 (absent/alive/dead)'],
    'Elevated10': ['elev_h_10', 'Elevated fuel height: Sample 10 (m)'],
    'Elevated_9': ['elev_state_10', 'Elevated fuel state: Sample 10 (absent/alive/dead)'],
    'AverageFue': ['litter_d_m', 'Mean litter depth (mm)'],
    'AverageSur': ['litter_state_count', 'Litter cover P/A count (/10)'],
    'AveragePer': ['litter_cov', 'Litter cover (%)'],
    'FuelLoad': ['litter_load', 'Litter fuel load (t/ha)'],
    'Fuel_Hazar': ['s_fhr', 'Surface fuel hazard rating'], 
    'averageNSh': ['ns_h_m', 'Mean near surface height (m)'], 
    'averageNSc': ['ns_state_count', 'Near Surface P/A count (/10)'], 
    'averageN_1': ['ns_cov', 'Near surface cover (%)'], 
    'averageNSd': ['ns_dead_count', 'Near surface dead count'], 
    'percentage': ['ns_dead_%', 'Near surface dead as percentage of near surface present'], 
    'nearsur_20': ['_ns_fhr', 'Near surface Fuel Hazard Rating'], # repeat?
    'Near_Surfa': ['ns_fhr', 'Near surface Fuel Hazard Rating'], 
    'nearsur_21': ['ns_load', 'Near surface fuel load (t/ha)'], 
    'combined': ['s&ns_fhr', 'Combined surface and near surface Fuel Hazard Rating'], 
    'averageele': ['elev_h_m', 'Mean elevated height (m)'], 
    'averagee_1': ['elev_state_count', 'Elevated P/A count (/10)'], 
    'averagee_2': ['elev_cov', 'elevated cover (%)'], 
    'averagee_3': ['elev_dead_count', 'Elevated dead count'],
    'elevatedpe': ['elev_dead_%', 'elevated dead as percentage of elevated present'], 
    'elevated_f': ['_elev_fhr', 'Elevated Fuel Hazard Rating'], # repeat?
    'elevated_h': ['elev_fhr', 'Elevated Fuel Hazard Rating'], 
    'elevate_10': ['elev_load', 'Elevated fuel load (t/ha)'], 
    'bark_type': ['bark_type', 'Bark type'], 
    'bark_fuel': ['bark_haz&type', 'Bark hazard rating and type'], 
    'bark_hazar': ['bark_haz', 'Bark hazard rating'], 
    'bark_fuell': ['bark_load', 'Bark fuel load (t/ha)'], 
    'height_ave': ['canopy_h_m', 'Mean canopy height (m)'], 
    'cannopy_av':['canopy_cov_m', 'Mean canopy cover (%)'], 
    'plotgood': ['representative', 'Plot is representative of area (Y/N)'], 
    'comments': ['comments', 'Comments'],
    'geometry': ['geometry', 'Shapefile geometry'],
}

# column names and descriptions
labels = {
    'afo_cc': 'AFO Crown Cover %',
    'afo_lfd': 'AFO Ladder Fuel Density',
    'afo_ch': 'AFO Canopy Height (m)',
    'afo_cbh': 'AFO Canopy Base Height (m)',
    'afo_litter': 'AFO litter fuel load (t/ha)', 
    'afo_surface': 'AFO surface fuel load (t/ha)', 
    'afo_elevated': 'AFO elevated fuel  load (t/ha)', 
    'afo_bark': 'AFO bark fuel load (t/ha)'
}

#smaller working set of data - extraneous columns removed
subset = [
    'id', 'site', 'plot', 'observer', 'veg_type', 'fire_hist', 'loc_accuracy', 
    'litter_d_m', 'litter_cov', 'litter_load', 's_fhr', 
    'ns_h_m', 'ns_cov', 'ns_fhr', 'ns_load', 
    's&ns_fhr', 
    'elev_h_m', 'elev_cov', 'elev_fhr', 'elev_load', 
    'bark_type', 'bark_haz', 'bark_load', 
    'canopy_h_m', 'canopy_cov_m', 
    'representative', 'comments', 'geometry'
]

In [19]:
# helper functions
def read_afo(paths: dict, coord_df: GeoDataFrame, index: int = None, offset: int = 0) -> Series:
    """samples single and multiband AFO geotiffs

    Args:
        paths (dict): region names and relative path to geotiffs
        coord_df (GeoDataFrame): gdf containing sample coordinates as `geometry`
        index (int, optional): for multiband data the index of the band to sample
        offset (int, optional): generates random offsets to test robustness of data
            sampling to location. Maximum offset in x or y is `offset`

    Returns:
        Series: sampled data
    """

    coord_df = coord_df[['geometry']]
    for loc, path in paths.items():
        with rasterio.open(path, 'r') as src:
            print(f'Input CRS is: {src.crs}')
            coord_df = coord_df.to_crs(src.crs)
            site_coords = [
                (x,y) for x,y in zip(
                    coord_df['geometry'].x - offset, coord_df['geometry'].y
                )
            ]
            coord_df[loc] = -9999
            if index is None:
                coord_df[loc] = [x for x in src.sample(site_coords)]
            else:
                coord_df[loc] = [x[index] for x in src.sample(site_coords)]
    
    return coord_df[paths.keys()].max(axis=1)

def read_rfs(path: str, coord_df: GeoDataFrame) -> Series:
    """reads RFS corporate data in geotiff form

    Args:
        path (str): path to the geotiff file
        coord_df (GeoDataFrame): gdf containing sample coordinates as `geometry`

    Returns:
        Series: sampled data
    """
    coord_df = coord_df[['geometry']]
    with rasterio.open(path, 'r') as src:
        print(f'Input CRS is: {src.crs}')
        coord_df = coord_df.to_crs(src.crs)
        site_coords = [
            (x,y) for x,y in zip(coord_df['geometry'].x, coord_df['geometry'].y)
        ]
        coord_df['sample_data'] = [x[0] for x in src.sample(site_coords)]
    return coord_df['sample_data']

def reg_plot(x: str, y: str, df: DataFrame, hue: str=None) -> Axes:
    """Regression plot with stats. Points coloured by `hue`

    Args:
        x (str): df column name for x values
        y (str): df column name for y values
        df (DataFrame):
        hue (str): df column name to use to colour the values. Defaults to `None` 
    """
    _df = df[[x, y]].dropna()
    slope, intercept, r_value, p_value, std_err = linregress(_df[x], _df[y])
    stats = f'pearson r: {r_value:.2f} \n r sq: {r_value**2:.2f}'
    grid = sns.lmplot(x=x, y=y, data=df, hue=hue, fit_reg=False, height=8, aspect=1.5)
    ax = grid.axes[0, 0]
    sns.regplot(x=x, y=y, data=df, scatter=False, ax=ax)
    ax.text(0.1, 0.9, stats, ha='center', va='center', transform=ax.transAxes)
    ax.set(xlabel=labels[x], ylabel=labels[y])
    
    return ax

def reg_plots(x: str, y: str, df: DataFrame, hue: str=None) -> sns.FacetGrid:
    """creates a series of linear regression with axis labels and calculates
    descriptive statistics.

    Args:
        x (str): df column name for x values
        y (str): df column name for y values
        df (DataFrame):
        hue (str, optional): df column name to define subsets. Defaults to `None`.
    """
    subsets = set(df[hue])
    subsets.discard(None)
    regressions = sns.FacetGrid(df, col=hue, col_order=subsets, hue=hue, height=5, aspect=1.2, col_wrap=2)
    regressions.map(sns.regplot, x, y)
    regressions.set_ylabels(labels[y])
    regressions.set_xlabels(labels[x])
    
    for ax, subset in zip(regressions.axes.flat, subsets):
        _df = df.loc[df[hue]==subset]
        _df = _df[[x, y]].dropna()
        if _df.shape[0] > 5: # make sure _df not too small
            slope, intercept, r_value, p_value, std_err = linregress(_df[x], _df[y], )
            stats = f'pearson r: {r_value:.2f} \n r sq: {r_value**2:.2f}'
            ax.text(0.15, 0.9, stats, ha='center', va='center', transform=ax.transAxes)
    
    return regressions

def rasters_resample(path_dict: dict, res: float) -> dict:
    """resamples geotiffs to resolution = `res`.

    Args:
        path_dict (dict): dictionary containing path to input geotifs.
        res (float): resolution in projected units of resultant geotiff

    Returns:
        dict: dictionary containing path to input geotifs.
    """
    out_paths = {}
    for key, path in path_dict.items():
        outpath = f'{path.split(".")[0]}_{res}.tif'
        gdal.Translate(
            outpath, path,
            options=f'-of GTiff -tr {res} {res} -r bilinear'
        )
        out_paths[key] = outpath 
    return out_paths

read the site and field sampling geodata

In [4]:
gdf = gpd.read_file('spatial_data/Field_Data/Fuel_Sampling___Version_0_1.shp')
# site_gdf = gpd.read_file('spatial_data/Sample_Points.shp')

rename columns to something meaningful (associated with longer description in dictionary)

In [5]:
rename_dict = {} # used to cut down and rename columns
for key, [field, desc] in all_cols.items():
    rename_dict[key] = field
    labels[field] = desc
gdf = gdf.rename(columns=rename_dict)

the survey app deals poorly with missing data so recalculate means and percentages

In [6]:
quantitative_fields = ['canopy_cov', 'canopy_h', 'elev_h', 'litter_d', 'ns_h']
presence_absence_fields = ['elev_state', 'litter_state', 'ns_state']

for field in quantitative_fields:
    cols = [string for string in list(gdf.columns.values) if field in string]
    gdf[f'{field}_m'] = gdf[cols[:-1]].mean(axis=1)

for field in presence_absence_fields:
    cols = [string for string in list(gdf.columns.values) if field in string]
    gdf[f'{field}_count'] = (
        gdf[gdf[cols[:-1]] == 'alive'].count(axis=1) + 
        gdf[gdf[cols[:-1]] == 'dead'].count(axis=1) + 
        gdf[gdf[cols[:-1]] == 'yes'].count(axis=1)
    )
    prefix=field.split('_')[0]
    gdf[f'{prefix}_cov'] = gdf[f'{field}_count']*10

cut down the number of columns

In [7]:
gdf = gdf[subset]
# sort it just because we can :)
gdf.sort_values(by=['site', 'plot'], inplace=True)
gdf.head()

Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,elev_fhr,elev_load,bark_type,bark_haz,bark_load,canopy_h_m,canopy_cov_m,representative,comments,geometry
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,,10,Slab,Low,0,6.8,30.0,yes,,POINT (151.28407 -33.58385)
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,High,2,Slab,Low,0,5.2,26.0,,,POINT (151.28377 -33.58408)
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,High,2,Slab,Moderate,0,5.0,16.0,yes,,POINT (151.28358 -33.58432)
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,Moderate,0,Slab,Low,0,11.0,60.0,yes,,POINT (151.04137 -34.16933)
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,Moderate,0,Slab,Low,0,9.8,26.0,yes,,POINT (151.04147 -34.16886)


shapefile data are imported as objects so need to convert to data types that can be used for correlation and other analyses

In [8]:
gdf= gdf.replace('BlanK', np.nan) # some missing values

numeric = [
    'litter_d_m', 'litter_cov', 'litter_load', 
    'ns_h_m', 'ns_cov', 'ns_load', 
    'elev_h_m', 'elev_cov', 'elev_load', 
    'bark_load', 
    'canopy_h_m', 'canopy_cov_m', 
]

for col in numeric:
    gdf[col] = pd.to_numeric(gdf[col])
gdf.dtypes

id                  object
site                object
plot                object
observer            object
veg_type            object
fire_hist           object
loc_accuracy       float64
litter_d_m         float64
litter_cov           int64
litter_load        float64
s_fhr               object
ns_h_m             float64
ns_cov               int64
ns_fhr              object
ns_load            float64
s&ns_fhr            object
elev_h_m           float64
elev_cov             int64
elev_fhr            object
elev_load            int64
bark_type           object
bark_haz            object
bark_load            int64
canopy_h_m         float64
canopy_cov_m       float64
representative      object
comments            object
geometry          geometry
dtype: object

In [9]:
# add region identifiers
gdf['region'] = np.where(
    gdf['site'].str.startswith('EC'), 'central',
        np.where(gdf['site'].str.startswith('P'), 'pilliga', 
            np.where(gdf['site'].str.startswith('SM'),'snowy', 'other')
    )
)
gdf.head()

Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,elev_load,bark_type,bark_haz,bark_load,canopy_h_m,canopy_cov_m,representative,comments,geometry,region
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,10,Slab,Low,0,6.8,30.0,yes,,POINT (151.28407 -33.58385),central
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,2,Slab,Low,0,5.2,26.0,,,POINT (151.28377 -33.58408),central
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,2,Slab,Moderate,0,5.0,16.0,yes,,POINT (151.28358 -33.58432),central
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,0,Slab,Low,0,11.0,60.0,yes,,POINT (151.04137 -34.16933),central
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,0,Slab,Low,0,9.8,26.0,yes,,POINT (151.04147 -34.16886),central


## Sample the AFO geotiffs

It seems that all the geotiffs are all projected using the MGA zone 56 crs (EPSG:32756) regardless of what zone they are in but let's check this as we read them.

In [10]:
#crown cover
cc_paths = {
    'pilliga': 'spatial_data/Pilliga-vegetation-canopy_cover-2021.tif',
    'central': 'spatial_data/Centralcoast-vegetation-canopy_cover-2021.tif',
    'snowy': 'spatial_data/Southmnts-vegetation-canopy_cover-2021.tif',
}

gdf['afo_cc'] = read_afo(cc_paths, gdf)
gdf.head()

Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756


Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,bark_type,bark_haz,bark_load,canopy_h_m,canopy_cov_m,representative,comments,geometry,region,afo_cc
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,Slab,Low,0,6.8,30.0,yes,,POINT (151.28407 -33.58385),central,50.94128
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,Slab,Low,0,5.2,26.0,,,POINT (151.28377 -33.58408),central,53.012089
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,Slab,Moderate,0,5.0,16.0,yes,,POINT (151.28358 -33.58432),central,36.591705
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,Slab,Low,0,11.0,60.0,yes,,POINT (151.04137 -34.16933),central,82.196129
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,Slab,Low,0,9.8,26.0,yes,,POINT (151.04147 -34.16886),central,73.356422


In [11]:
# ladder fuel density
lfd_paths = {
    'pilliga': 'spatial_data/Pilliga-vegetation-ladder_fuel_density-2021.tif',
    'central': 'spatial_data/Centralcoast-vegetation-ladder_fuel_density-2021.tif',
    'snowy': 'spatial_data/Southmnts-vegetation-ladder_fuel_density-2021.tif',
}
gdf['afo_lfd'] = read_afo(lfd_paths, gdf)
gdf.head()

Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756


Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,bark_haz,bark_load,canopy_h_m,canopy_cov_m,representative,comments,geometry,region,afo_cc,afo_lfd
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,Low,0,6.8,30.0,yes,,POINT (151.28407 -33.58385),central,50.94128,10.934896
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,Low,0,5.2,26.0,,,POINT (151.28377 -33.58408),central,53.012089,8.507341
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,Moderate,0,5.0,16.0,yes,,POINT (151.28358 -33.58432),central,36.591705,15.579016
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,Low,0,11.0,60.0,yes,,POINT (151.04137 -34.16933),central,82.196129,3.869198
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,Low,0,9.8,26.0,yes,,POINT (151.04147 -34.16886),central,73.356422,7.731657


In [12]:
# canopy height
ch_paths = {
    'pilliga': 'spatial_data/Pilliga-vegetation-canopy_height-2021.tif',
    'central': 'spatial_data/Centralcoast-vegetation-canopy_height-2021.tif',
    'snowy': 'spatial_data/Southmnts-vegetation-canopy_height-2021.tif',
}

gdf['afo_ch'] = read_afo(ch_paths, gdf)
gdf.head()

Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756


Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,bark_load,canopy_h_m,canopy_cov_m,representative,comments,geometry,region,afo_cc,afo_lfd,afo_ch
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,0,6.8,30.0,yes,,POINT (151.28407 -33.58385),central,50.94128,10.934896,5.625101
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,0,5.2,26.0,,,POINT (151.28377 -33.58408),central,53.012089,8.507341,6.257932
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,0,5.0,16.0,yes,,POINT (151.28358 -33.58432),central,36.591705,15.579016,3.823408
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,0,11.0,60.0,yes,,POINT (151.04137 -34.16933),central,82.196129,3.869198,9.999367
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,0,9.8,26.0,yes,,POINT (151.04147 -34.16886),central,73.356422,7.731657,5.684369


In [13]:
# canopy base height
cbh_paths = {
    'pilliga': 'spatial_data/Pilliga-vegetation-canopy_base_height-2021.tif',
    'central': 'spatial_data/Centralcoast-vegetation-canopy_base_height-2021.tif',
    'snowy': 'spatial_data/Southmnts-vegetation-canopy_base_height-2021.tif',
}
gdf['afo_cbh'] = read_afo(cbh_paths, gdf)
gdf.head()

Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756


Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,canopy_h_m,canopy_cov_m,representative,comments,geometry,region,afo_cc,afo_lfd,afo_ch,afo_cbh
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,6.8,30.0,yes,,POINT (151.28407 -33.58385),central,50.94128,10.934896,5.625101,2.781175
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,5.2,26.0,,,POINT (151.28377 -33.58408),central,53.012089,8.507341,6.257932,3.228979
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,5.0,16.0,yes,,POINT (151.28358 -33.58432),central,36.591705,15.579016,3.823408,3.203048
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,11.0,60.0,yes,,POINT (151.04137 -34.16933),central,82.196129,3.869198,9.999367,4.333815
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,9.8,26.0,yes,,POINT (151.04147 -34.16886),central,73.356422,7.731657,5.684369,2.840926


In [14]:
# fuel loads
fuel_load_paths = {
    'pilliga': 'spatial_data/Pilliga-fuels-classes-2021-density.tif',
    'central': 'spatial_data/Centralcoast-fuels-classes-2021-density.tif',
    'snowy': 'spatial_data/Southmnts-fuels-classes-2021-density.tif',
}

strata = ['afo_litter', 'afo_surface', 'afo_elevated', 'afo_bark']

for i, stratum in enumerate(strata):
    gdf[stratum] = read_afo(fuel_load_paths, gdf, index=i)

gdf.head()

Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756
Input CRS is: EPSG:32756


Unnamed: 0,id,site,plot,observer,veg_type,fire_hist,loc_accuracy,litter_d_m,litter_cov,litter_load,...,geometry,region,afo_cc,afo_lfd,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,DSF,2.0,4.658635,19.0,100,5.0,...,POINT (151.28407 -33.58385),central,50.94128,10.934896,5.625101,2.781175,17.71669,17.71669,1.855503,3.123432
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,DSF,2.0,4.646899,11.0,40,2.5,...,POINT (151.28377 -33.58408),central,53.012089,8.507341,6.257932,3.228979,17.689646,17.689646,1.800139,3.030236
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,DSF,2.0,4.675196,11.0,10,1.25,...,POINT (151.28358 -33.58432),central,36.591705,15.579016,3.823408,3.203048,17.148125,17.148125,1.23049,2.071327
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,DSF,,4.668194,24.0,80,5.0,...,POINT (151.04137 -34.16933),central,82.196129,3.869198,9.999367,4.333815,14.651359,16.660486,6.41348,3.069542
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,DSF,,4.658071,9.0,50,2.5,...,POINT (151.04147 -34.16886),central,73.356422,7.731657,5.684369,2.840926,14.50838,16.497902,5.340499,2.556005


trim the dataframe down again

In [None]:
print(list(gdf.columns.values))

In [15]:
gdf = gdf[[
    'id', 'site', 'plot', 'observer', 'region', 'geometry', 'veg_type', 'fire_hist', 'loc_accuracy',
    'litter_d_m', 'litter_cov', 'litter_load', 's_fhr', 
    'ns_h_m', 'ns_cov', 'ns_fhr', 'ns_fhr', 'ns_load', 's&ns_fhr', 
    'elev_h_m', 'elev_cov', 'elev_fhr', 'elev_fhr', 'elev_load', 
    'bark_type', 'bark_haz', 'bark_load', 
    'canopy_h_m', 'canopy_cov_m', 
    'representative', 
    'afo_cc', 'afo_lfd', 'afo_ch', 'afo_cbh', 'afo_litter', 'afo_surface', 'afo_elevated', 'afo_bark'
]]

gdf.head()

Unnamed: 0,id,site,plot,observer,region,geometry,veg_type,fire_hist,loc_accuracy,litter_d_m,...,canopy_cov_m,representative,afo_cc,afo_lfd,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,central,POINT (151.28407 -33.58385),DSF,2.0,4.658635,19.0,...,30.0,yes,50.94128,10.934896,5.625101,2.781175,17.71669,17.71669,1.855503,3.123432
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,central,POINT (151.28377 -33.58408),DSF,2.0,4.646899,11.0,...,26.0,,53.012089,8.507341,6.257932,3.228979,17.689646,17.689646,1.800139,3.030236
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,central,POINT (151.28358 -33.58432),DSF,2.0,4.675196,11.0,...,16.0,yes,36.591705,15.579016,3.823408,3.203048,17.148125,17.148125,1.23049,2.071327
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,central,POINT (151.04137 -34.16933),DSF,,4.668194,24.0,...,60.0,yes,82.196129,3.869198,9.999367,4.333815,14.651359,16.660486,6.41348,3.069542
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,central,POINT (151.04147 -34.16886),DSF,,4.658071,9.0,...,26.0,yes,73.356422,7.731657,5.684369,2.840926,14.50838,16.497902,5.340499,2.556005


In [16]:
df_corr = gdf.corr(method='pearson')
df_corr

Unnamed: 0,loc_accuracy,litter_d_m,litter_cov,litter_load,ns_h_m,ns_cov,ns_load,elev_h_m,elev_cov,elev_load,...,canopy_h_m,canopy_cov_m,afo_cc,afo_lfd,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark
loc_accuracy,1.0,0.112384,-0.003259,-0.04323,-0.067725,0.081307,-0.013285,0.103546,0.056404,-0.012017,...,0.168271,0.127381,-0.026431,0.015386,0.065074,0.013533,0.147237,0.138038,0.015218,0.166415
litter_d_m,0.112384,1.0,0.317014,0.91946,0.224079,0.1382,0.188182,0.280819,0.143506,0.124034,...,0.099584,0.585697,0.35317,-0.14971,0.495831,0.193215,0.490384,0.508517,0.404072,0.690024
litter_cov,-0.003259,0.317014,1.0,0.362295,0.001048,0.264071,0.014278,0.026849,0.103379,0.003211,...,0.180186,0.331209,0.020424,-0.102912,0.244053,0.164425,0.097956,0.1207,0.09009,0.121293
litter_load,-0.04323,0.91946,0.362295,1.0,0.239996,0.167273,0.188283,0.222281,0.103818,0.053984,...,0.056841,0.508947,0.258593,-0.101412,0.411155,0.165026,0.408429,0.426184,0.323718,0.568674
ns_h_m,-0.067725,0.224079,0.001048,0.239996,1.0,0.553662,0.455636,0.460361,0.383685,0.120231,...,-0.222611,-0.176827,-0.074975,0.103687,-0.129795,-0.187325,0.080328,0.082085,0.087025,0.052253
ns_cov,0.081307,0.1382,0.264071,0.167273,0.553662,1.0,0.214662,0.314499,0.383645,-0.024871,...,0.100558,-0.038091,-0.184162,0.1643,0.034459,0.053096,0.081935,0.06587,-0.148173,-0.035989
ns_load,-0.013285,0.188182,0.014278,0.188283,0.455636,0.214662,1.0,0.559535,0.388701,0.592559,...,-0.177404,-0.031426,0.006199,0.118763,-0.184981,-0.201827,-0.023631,0.014287,0.180432,-0.001745
elev_h_m,0.103546,0.280819,0.026849,0.222281,0.460361,0.314499,0.559535,1.0,0.843357,0.557096,...,-0.048543,0.140463,0.16049,0.212856,0.098199,-0.047572,0.198271,0.223944,0.298951,0.312944
elev_cov,0.056404,0.143506,0.103379,0.103818,0.383685,0.383645,0.388701,0.843357,1.0,0.457202,...,0.059437,0.0737,0.084107,0.255311,0.059883,-0.063541,0.113997,0.14306,0.17804,0.221774
elev_load,-0.012017,0.124034,0.003211,0.053984,0.120231,-0.024871,0.592559,0.557096,0.457202,1.0,...,-0.014543,0.063671,0.01292,0.182625,-0.046872,-0.117747,0.057657,0.075452,0.163522,0.069148


just grab the rows and columns we are interested in

In [17]:
afo_keys = [
    'afo_cc', 'afo_lfd', 'afo_ch', 'afo_cbh', 'afo_litter', 
    'afo_surface', 'afo_elevated', 'afo_bark',     
]

field_keys = [
    'litter_d_m', 'litter_cov', 'litter_load', 'ns_h_m', 'ns_cov', 
    'ns_load', 'elev_h_m', 'elev_cov', 'elev_load', 'bark_load', 
    'canopy_h_m', 'canopy_cov_m',
]

df_corr.loc[field_keys, afo_keys]

Unnamed: 0,afo_cc,afo_lfd,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark
litter_d_m,0.35317,-0.14971,0.495831,0.193215,0.490384,0.508517,0.404072,0.690024
litter_cov,0.020424,-0.102912,0.244053,0.164425,0.097956,0.1207,0.09009,0.121293
litter_load,0.258593,-0.101412,0.411155,0.165026,0.408429,0.426184,0.323718,0.568674
ns_h_m,-0.074975,0.103687,-0.129795,-0.187325,0.080328,0.082085,0.087025,0.052253
ns_cov,-0.184162,0.1643,0.034459,0.053096,0.081935,0.06587,-0.148173,-0.035989
ns_load,0.006199,0.118763,-0.184981,-0.201827,-0.023631,0.014287,0.180432,-0.001745
elev_h_m,0.16049,0.212856,0.098199,-0.047572,0.198271,0.223944,0.298951,0.312944
elev_cov,0.084107,0.255311,0.059883,-0.063541,0.113997,0.14306,0.17804,0.221774
elev_load,0.01292,0.182625,-0.046872,-0.117747,0.057657,0.075452,0.163522,0.069148
bark_load,-0.037023,-0.030943,0.325142,0.195205,0.096099,0.093692,-0.094156,0.167462


Graph variables that should correlate

note ladder fuel density does not seem well correlated with anything :(

In [None]:
x = 'afo_cc'
y = 'canopy_cov_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_ch'
y = 'canopy_h_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_cbh'
y = 'canopy_h_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_litter'
y = 'litter_d_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_litter'
y = 'litter_load'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_litter'
y = 'afo_surface'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_surface'
y = 'litter_d_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_elevated'
y = 'elev_cov'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
x = 'afo_bark'
y = 'canopy_cov_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

poor correlations may be due to the fine and variable nature of the AFO data. Test how robust the results are to location by sampling with an offset of 20 m

In [None]:
gdf['afo_cc_off'] = read_afo(cc_paths, gdf, offset=20)
labels['afo_cc_off'] = 'AFO Canopy Cover offset 20 m'
gdf.head()

In [None]:
x = 'afo_cc'
y = 'afo_cc_off'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

If the data are so sensitive lets resample it - after a bit of playing it seems that for most of the AFO datasets the greatest improvement in r comes if we resample to a grid cell size of 30 m

In [None]:
res = 30
lr_cc_paths = rasters_resample(cc_paths, res)
gdf['afo_cc_lr'] = read_afo(lr_cc_paths, gdf)
labels['afo_cc_lr'] = f'AFO Canopy Cover (%) {res} m resolution'
gdf.head()

In [None]:
x = 'afo_cc_lr'
y = 'canopy_cov_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

In [None]:
label = 'afo_surface_lr'
paths = rasters_resample(fuel_load_paths, res)
gdf[label] = read_afo(paths, gdf, index=1)
labels[label] = f'AFO Surface Cover {res} m resolution'
x = label
y = 'litter_d_m'
df = gdf
reg_plot(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='region')
plt.show()
reg_plots(x, y, df, hue='veg_type')
plt.show()

## RFS Corporate Data

In [18]:
# def read_rfs(path: str, coord_df: GeoDataFrame) -> Series:

#     coord_df = coord_df[['geometry']]
#     with rasterio.open(path, 'r') as src:
#         print(f'Input CRS is: {src.crs}')
#         coord_df = coord_df.to_crs(src.crs)
#         site_coords = [
#             (x,y) for x,y in zip(coord_df['geometry'].x, coord_df['geometry'].y)
#         ]
#         coord_df['sample_data'] = [x[0] for x in src.sample(site_coords)]
#     return coord_df['sample_data']

In [20]:
path = 'spatial_data/RFS/Bark_FuelTypeV211_202109201.tif'
gdf['rfs_bark'] = read_rfs(path,gdf)
labels['rfs_bark'] = 'RFS Bark Fuel Load 02109201'
path = 'spatial_data/RFS/Canopy_FuelTypeV2111.tif'
gdf['rfs_canopy'] = read_rfs(path,gdf)
labels['rfs_canopy'] = 'RFS Canopy Fuel Load 02109201'
path = 'spatial_data/RFS/Elevated_FuelTypeV211_202109201.tif'
gdf['rfs_elevated'] = read_rfs(path,gdf)
labels['rfs_elevated'] = 'RFS Elevated Fuel Load 02109201'
path = 'spatial_data/RFS/Surface_FuelTypeV211_202109201.tif'
gdf['rfs_surface'] = read_rfs(path,gdf)
labels['rfs_surface'] = 'RFS Surface Fuel Load 02109201'
gdf.head()

Input CRS is: EPSG:3308
Input CRS is: EPSG:3308
Input CRS is: EPSG:3308
Input CRS is: EPSG:3308


Unnamed: 0,id,site,plot,observer,region,geometry,veg_type,fire_hist,loc_accuracy,litter_d_m,...,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark,rfs_bark,rfs_canopy,rfs_elevated,rfs_surface
16,ccfa3f44-ca62-46fd-bc88-e41f28247cd3,EC10,1,Laurence McCoy,central,POINT (151.28407 -33.58385),DSF,2.0,4.658635,19.0,...,5.625101,2.781175,17.71669,17.71669,1.855503,3.123432,1.355535,9.6,0.364655,6.020303
25,4d19111d-9db7-4087-9fff-627dbf92f9f4,EC10,2,Laurence McCoy,central,POINT (151.28377 -33.58408),DSF,2.0,4.646899,11.0,...,6.257932,3.228979,17.689646,17.689646,1.800139,3.030236,1.355535,9.6,0.364655,6.020303
27,9f8027da-efe2-4dbd-a281-9685e01b3fc6,EC10,3,Laurence McCoy,central,POINT (151.28358 -33.58432),DSF,2.0,4.675196,11.0,...,3.823408,3.203048,17.148125,17.148125,1.23049,2.071327,1.355535,9.6,0.364655,6.020303
34,f4731772-5879-4fa8-b69f-9f0fe8e53adc,EC11,2,Derek Meaghan,central,POINT (151.04137 -34.16933),DSF,,4.668194,24.0,...,9.999367,4.333815,14.651359,16.660486,6.41348,3.069542,1.414082,3.5,3.012818,8.95273
39,399e8689-019a-4776-a6b7-8186c7cdc404,EC11,3,Meaghan Derek,central,POINT (151.04147 -34.16886),DSF,,4.658071,9.0,...,5.684369,2.840926,14.50838,16.497902,5.340499,2.556005,1.414082,3.5,3.012818,8.95273


In [21]:
df_corr = gdf.corr(method='pearson')
df_corr

Unnamed: 0,loc_accuracy,litter_d_m,litter_cov,litter_load,ns_h_m,ns_cov,ns_load,elev_h_m,elev_cov,elev_load,...,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark,rfs_bark,rfs_canopy,rfs_elevated,rfs_surface
loc_accuracy,1.0,0.112384,-0.003259,-0.04323,-0.067725,0.081307,-0.013285,0.103546,0.056404,-0.012017,...,0.065074,0.013533,0.147237,0.138038,0.015218,0.166415,0.090835,0.078571,-0.004282,0.119698
litter_d_m,0.112384,1.0,0.317014,0.91946,0.224079,0.1382,0.188182,0.280819,0.143506,0.124034,...,0.495831,0.193215,0.490384,0.508517,0.404072,0.690024,0.619953,0.133364,0.121601,0.407038
litter_cov,-0.003259,0.317014,1.0,0.362295,0.001048,0.264071,0.014278,0.026849,0.103379,0.003211,...,0.244053,0.164425,0.097956,0.1207,0.09009,0.121293,0.146062,0.082477,0.01388,0.241863
litter_load,-0.04323,0.91946,0.362295,1.0,0.239996,0.167273,0.188283,0.222281,0.103818,0.053984,...,0.411155,0.165026,0.408429,0.426184,0.323718,0.568674,0.511792,0.093358,0.113079,0.344984
ns_h_m,-0.067725,0.224079,0.001048,0.239996,1.0,0.553662,0.455636,0.460361,0.383685,0.120231,...,-0.129795,-0.187325,0.080328,0.082085,0.087025,0.052253,0.015352,-0.18509,0.129139,-0.083556
ns_cov,0.081307,0.1382,0.264071,0.167273,0.553662,1.0,0.214662,0.314499,0.383645,-0.024871,...,0.034459,0.053096,0.081935,0.06587,-0.148173,-0.035989,-0.102012,0.237846,-0.143601,-0.091579
ns_load,-0.013285,0.188182,0.014278,0.188283,0.455636,0.214662,1.0,0.559535,0.388701,0.592559,...,-0.184981,-0.201827,-0.023631,0.014287,0.180432,-0.001745,0.079989,-0.220701,0.352439,0.163403
elev_h_m,0.103546,0.280819,0.026849,0.222281,0.460361,0.314499,0.559535,1.0,0.843357,0.557096,...,0.098199,-0.047572,0.198271,0.223944,0.298951,0.312944,0.282233,0.019912,0.271427,0.104055
elev_cov,0.056404,0.143506,0.103379,0.103818,0.383685,0.383645,0.388701,0.843357,1.0,0.457202,...,0.059883,-0.063541,0.113997,0.14306,0.17804,0.221774,0.169052,0.17003,0.113547,-0.053214
elev_load,-0.012017,0.124034,0.003211,0.053984,0.120231,-0.024871,0.592559,0.557096,0.457202,1.0,...,-0.046872,-0.117747,0.057657,0.075452,0.163522,0.069148,0.106926,-0.059344,0.268345,0.139048


In [22]:
df_corr.columns.values

array(['loc_accuracy', 'litter_d_m', 'litter_cov', 'litter_load',
       'ns_h_m', 'ns_cov', 'ns_load', 'elev_h_m', 'elev_cov', 'elev_load',
       'bark_load', 'canopy_h_m', 'canopy_cov_m', 'afo_cc', 'afo_lfd',
       'afo_ch', 'afo_cbh', 'afo_litter', 'afo_surface', 'afo_elevated',
       'afo_bark', 'rfs_bark', 'rfs_canopy', 'rfs_elevated',
       'rfs_surface'], dtype=object)

In [23]:
rfs_keys = ['rfs_bark', 'rfs_canopy', 'rfs_elevated', 'rfs_surface']

In [24]:
df_corr.loc[field_keys, afo_keys]

Unnamed: 0,afo_cc,afo_lfd,afo_ch,afo_cbh,afo_litter,afo_surface,afo_elevated,afo_bark
litter_d_m,0.35317,-0.14971,0.495831,0.193215,0.490384,0.508517,0.404072,0.690024
litter_cov,0.020424,-0.102912,0.244053,0.164425,0.097956,0.1207,0.09009,0.121293
litter_load,0.258593,-0.101412,0.411155,0.165026,0.408429,0.426184,0.323718,0.568674
ns_h_m,-0.074975,0.103687,-0.129795,-0.187325,0.080328,0.082085,0.087025,0.052253
ns_cov,-0.184162,0.1643,0.034459,0.053096,0.081935,0.06587,-0.148173,-0.035989
ns_load,0.006199,0.118763,-0.184981,-0.201827,-0.023631,0.014287,0.180432,-0.001745
elev_h_m,0.16049,0.212856,0.098199,-0.047572,0.198271,0.223944,0.298951,0.312944
elev_cov,0.084107,0.255311,0.059883,-0.063541,0.113997,0.14306,0.17804,0.221774
elev_load,0.01292,0.182625,-0.046872,-0.117747,0.057657,0.075452,0.163522,0.069148
bark_load,-0.037023,-0.030943,0.325142,0.195205,0.096099,0.093692,-0.094156,0.167462


In [25]:
df_corr.loc[field_keys, rfs_keys]

Unnamed: 0,rfs_bark,rfs_canopy,rfs_elevated,rfs_surface
litter_d_m,0.619953,0.133364,0.121601,0.407038
litter_cov,0.146062,0.082477,0.01388,0.241863
litter_load,0.511792,0.093358,0.113079,0.344984
ns_h_m,0.015352,-0.18509,0.129139,-0.083556
ns_cov,-0.102012,0.237846,-0.143601,-0.091579
ns_load,0.079989,-0.220701,0.352439,0.163403
elev_h_m,0.282233,0.019912,0.271427,0.104055
elev_cov,0.169052,0.17003,0.113547,-0.053214
elev_load,0.106926,-0.059344,0.268345,0.139048
bark_load,0.157634,0.368705,-0.176854,0.177658


In [26]:
df_corr.loc[afo_keys, rfs_keys]

Unnamed: 0,rfs_bark,rfs_canopy,rfs_elevated,rfs_surface
afo_cc,0.543068,-0.268005,0.515086,0.509988
afo_lfd,-0.167509,-0.019287,0.017058,-0.278017
afo_ch,0.471689,0.293119,0.001693,0.379728
afo_cbh,0.104881,0.407296,-0.264039,0.091228
afo_litter,0.311204,-0.140179,0.491073,0.57298
afo_surface,0.361935,-0.169445,0.550203,0.613967
afo_elevated,0.591552,-0.499266,0.784142,0.677641
afo_bark,0.821023,0.090864,0.241495,0.478344


In [None]:
x = 'afo_bark'
y = 'rfs_bark'
reg_plot(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='veg_type')
plt.show()

In [None]:
x = 'afo_elevated'
y = 'rfs_elevated'
reg_plot(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='veg_type')
plt.show()

In [None]:
x = 'afo_surface'
y = 'rfs_surface'
reg_plot(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='region')
plt.show()

In [None]:
x = 'afo_surface'
y = 'rfs_surface'
reg_plot(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='region')
plt.show()
reg_plots(x, y, gdf, hue='veg_type')
plt.show()

In [27]:
gdf.to_csv('AFO_truthiness.csv')