# Final Site Analysis of the Minicubes

This notebooks anaysis the minicubes by site. The idea is to measure trends and quantiles on a specific site. The analysis is done on the minicubes that have been created in the previous notebook.


In [None]:
import xarray as xr
import geopandas as gpd
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, HuberRegressor, RANSACRegressor, TheilSenRegressor
import statsmodels.api as sm

# plotting
import matplotlib.pyplot as plt

Please select the proper site and parameters

In [None]:
# Parameters
testsite = 2
threshhold = 0.15
dead = 0.2

First some Data processing to create season information and mask the data

In [None]:
ds = xr.open_dataset(f'data/FinalSites/Site{testsite:02}.zarr', engine='zarr')
# convert thresh to value
threshvalue = threshhold * 10000
deadvalue = dead * 10000

# Create masks for deadwood conditions
mask_healthy = (ds['deadwood_2018'] < threshvalue).values & (ds['deadwood_2019'] < threshvalue).values & (ds['deadwood_2020'] < threshvalue).values & (ds['deadwood_2021'] < threshvalue).values
mask_dead = (ds['deadwood_2018'] > deadvalue).values & (ds['deadwood_2019'] > deadvalue).values & (ds['deadwood_2020'] > deadvalue).values & (ds['deadwood_2021'] > deadvalue).values
ndvi_healthy = ds['ndvi'].where(mask_healthy)
ndvi_dead = ds['ndvi'].where(mask_dead)

# Create seasonal NDVI
seasons_healthy = {
    '2018' : ndvi_healthy.sel(time = slice('2018-05-20', '2018-08-31')),
    '2019' : ndvi_healthy.sel(time = slice('2019-05-20', '2019-08-31')),
    '2020' : ndvi_healthy.sel(time = slice('2020-05-20', '2020-08-31')),
    '2021' : ndvi_healthy.sel(time = slice('2021-05-20', '2021-08-31'))
}
seasons_dead = {
    '2018' : ndvi_dead.sel(time = slice('2018-05-20', '2018-08-31')),
    '2019' : ndvi_dead.sel(time = slice('2019-05-20', '2019-08-31')),
    '2020' : ndvi_dead.sel(time = slice('2020-05-20', '2020-08-31')),
    '2021' : ndvi_dead.sel(time = slice('2021-05-20', '2021-08-31'))
}

season_only_healthy = xr.concat([seasons_healthy['2018'],
        seasons_healthy['2019'],
        seasons_healthy['2020'],
        seasons_healthy['2021']], dim = 'time')

season_only_dead = xr.concat([seasons_dead['2018'],
        seasons_dead['2019'],
        seasons_dead['2020'],
        seasons_dead['2021']], dim = 'time')

Generate quantile data for the site

In [None]:
# get quantile values per season and store in xarray DataArray
quantiles = xr.DataArray(
    np.zeros((4, 3, 2)),
    dims=['season', 'quantile', 'state'],
    coords={'season': ['2018', '2019', '2020', '2021'],
            'quantile': [0.9, 0.95, 0.99],
            'state': ['healthy', 'deadwood']
    }
)
for i, season in enumerate(seasons_healthy):
    quantiles.sel(state = 'healthy').loc[season] = seasons_healthy[season].quantile([0.9, 0.95, 0.99]).values
for i, season in enumerate(seasons_dead):
    quantiles.sel(state = 'deadwood').loc[season] = seasons_dead[season].quantile([0.9, 0.95, 0.99]).values

Now several Plots to explore data

In [None]:
# distribution of healthy and deadwood
fig, ax = plt.subplots(1, 3, figsize=(20, 5))
ds['ndvi'].isel(time=0).plot.imshow(cmap='viridis', robust=True, ax=ax[0], add_colorbar=False)
ax[0].set_title(f'NDVI values, {np.datetime_as_string(ds.time[0].values, unit="D")}')
# no labels
ax[0].set_yticklabels([])
ax[0].set_xticklabels([])
ax[0].set_xlabel('')
ax[0].set_ylabel('')
(ndvi_dead.isel(time=10) > 0 ).plot.imshow(cmap = 'Reds', ax=ax[1], add_colorbar=False)
ax[1].set_title(f'Deadwood values, threshhold {dead*100}%')
# no labels
ax[1].set_yticklabels([])
ax[1].set_xticklabels([])
ax[1].set_xlabel('')
ax[1].set_ylabel('')
(ndvi_healthy.isel(time=10) > 0 ).plot.imshow(cmap = 'Greens', ax=ax[2], add_colorbar=False)
ax[2].set_title(f'non deadwood values, threshhold {threshhold*100}%')
# no labels
ax[2].set_yticklabels([])
ax[2].set_xticklabels([])
ax[2].set_xlabel('')
ax[2].set_ylabel('')
fig.suptitle(f'Healthy and Deadwood distribution of Site {testsite:02} in corresponding UTM grid')

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
ndvi_healthy.mean(dim=['y', 'x']).plot.line('b--')
ndvi_healthy.quantile(0.9, dim=['y', 'x']).plot.line('b')
ndvi_dead.mean(dim=['y', 'x']).plot.line('m--')
ndvi_dead.quantile(0.9, dim=['y', 'x']).plot.line('m')
ax.set_title(f'NDVI values of Site {testsite:02} accumulated over x/y coordinates')
ax.legend([
    f'deadwood < {threshhold*100}% (mean)',
    f'deadwood < {threshhold*100}% (0.9 quantile)',
    f'deadwood > {dead*100}% (mean)',
    f'deadwood > {dead*100}% (0.9 quantile)'
    ],
    loc='lower right',
    title = 'state of the trees',
    ncol=2)
plt.axvspan(pd.to_datetime('2018-05-20'), pd.to_datetime('2018-08-31'), color='green', alpha=0.1)
plt.text(pd.to_datetime('2018-05-20'), 0.3, 'growing season\n2018', rotation=30)
plt.axvspan(pd.to_datetime('2019-05-20'), pd.to_datetime('2019-08-31'), color='green', alpha=0.1)
plt.text(pd.to_datetime('2019-05-20'), 0.3, 'growing season\n2019', rotation=30)
plt.axvspan(pd.to_datetime('2020-05-20'), pd.to_datetime('2020-08-31'), color='green', alpha=0.1)
plt.text(pd.to_datetime('2020-05-20'), 0.3, 'growing season\n2020', rotation=30)
plt.axvspan(pd.to_datetime('2021-05-20'), pd.to_datetime('2021-08-31'), color='green', alpha=0.1)
plt.text(pd.to_datetime('2021-05-20'), 0.3, 'growing season\n2021', rotation=30)

In [None]:
# fig, ax = plt.subplots(figsize=(5, 3))
# quantiles.sel(quantile = 0.9).plot.line(x='season', hue='state', marker = 'o', linestyle = '')
# plt.legend([f'standing deadwood < {threshhold*100}%', f'standing deadwood > {dead*100}%'], loc='center right', title = 'state of the trees in pixel')
# plt.title(f'NDVI trend of site {testsite:02} using the 0.9 quantile of the seasonal data')
# plt.xlabel('Season')

The models need to be selected and the data needs to be prepared for the models

In [None]:
healthy = season_only_healthy.to_dataframe(name='ndvi')
healthy.reset_index(inplace=True)
healthy.dropna(inplace=True)
dead = season_only_dead.to_dataframe(name='ndvi')
dead.reset_index(inplace=True)
dead.dropna(inplace=True)
healthy.time = pd.to_datetime(healthy.time)
dead.time = pd.to_datetime(dead.time)

In [None]:
vec_time_healthy = healthy.time.values.reshape(-1, 1)
vec_healthy = healthy.ndvi.values
vec_time_dead = dead.time.values.reshape(-1, 1)
vec_dead = dead.ndvi.values

outlier detection: IQR

In [None]:
def outlier_IQR_remover(vec_time, vec):
    Q1 = np.percentile(vec, 25)
    Q3 = np.percentile(vec, 75)
    IQR = Q3 - Q1
    mask = (vec > Q1 - 1.5 * IQR) & (vec < Q3 + 1.5 * IQR)
    return vec_time[mask], vec[mask]

In [None]:
# vec_time_healthy, vec_healthy = outlier_IQR_remover(vec_time_healthy, vec_healthy)
# vec_time_dead, vec_dead = outlier_IQR_remover(vec_time_dead, vec_dead)

in case you need the numeric time, use this:

In [None]:
vec_time_healthy_numeric = (vec_time_healthy - vec_time_healthy.min()).astype('timedelta64[D]').astype(int)
vec_time_healthy_numeric = vec_time_healthy_numeric.reshape(-1, 1)
vec_time_dead_numeric = (vec_time_dead - vec_time_dead.min()).astype('timedelta64[D]').astype(int)
vec_time_dead_numeric = vec_time_dead_numeric.reshape(-1, 1)

In [None]:
# method = LinearRegression() # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
# method = TheilSenRegressor() # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.TheilSenRegressor.html
# method = RANSACRegressor() # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RANSACRegressor.html
method = HuberRegressor() # https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.HuberRegressor.html

model_healthy = method
model_healthy.fit(vec_time_healthy_numeric, vec_healthy)
R_healthy = model_healthy.score(vec_time_healthy_numeric, vec_healthy)
model_dead = method
model_dead.fit(vec_time_dead_numeric, vec_dead)
R_dead = model_dead.score(vec_time_dead_numeric, vec_dead)

print(f'R^2 healthy: {R_healthy}')
print(f'R^2 dead: {R_dead}')

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 5))
ax[0].plot(vec_time_healthy, vec_healthy, 'b')
ax[0].plot(vec_time_healthy, model_healthy.predict(vec_time_healthy_numeric), 'r--')
ax[0].set_title('Healthy trees')
ax[0].set_xlabel('Time')
ax[0].set_ylabel('NDVI')
ax[0].set_ylim(0, 1)
ax[0].legend(['NDVI', 'Linear Regression'], loc='lower right')
ax[0].text(0.7, 0.3, f'R^2: {R_healthy:.3f}',
           transform=ax[0].transAxes, fontsize=12,
           verticalalignment='top',
           bbox = dict(boxstyle='round', facecolor='white', alpha=0.5))
ax[1].plot(vec_time_dead, vec_dead, 'm')
ax[1].plot(vec_time_dead, model_dead.predict(vec_time_dead_numeric), 'r--')
ax[1].set_title('Deadwood trees')
ax[1].set_xlabel('Time')
ax[1].set_ylabel('NDVI')
ax[1].set_ylim(0, 1)
ax[1].legend(['NDVI', 'Linear Regression'], loc='lower right')
ax[1].text(0.7, 0.3, f'R^2: {R_dead:.3f}',
           transform=ax[1].transAxes, fontsize=12,
           verticalalignment='top',
           bbox = dict(boxstyle='round', facecolor='white', alpha=0.5))
fig.suptitle(f'NDVI trend of site {testsite:02} using the data only from the growing season')