# Model Evaluation Plots
Create plots to visualize model predictions in various ways.

In [None]:
import ast
from pathlib import Path

import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
from matplotlib import colors
from sklearn import metrics

import geopandas as gpd
from shapely.geometry import Polygon
import contextily as cx
import warnings
warnings.filterwarnings('ignore')

In [None]:
def set_index_variables(df):    
    df['ndvi'] = (df['B8_mean'] - df['B4_mean'])/(df['B8_mean'] + df['B4_mean'])
    df['gndvi'] = (df['B8_mean'] - df['B3_mean'])/(df['B8_mean'] + df['B3_mean'])
    df['evi'] = 2.5 * ((df['B8_mean'] - df['B4_mean'])/(df['B8_mean'] - 6*df['B4_mean'] - 7.5*df['B2_mean'] + 1))
    df['sr'] = df['B8_mean'] / df['B4_mean']
    df['msr'] = ((df['B8_mean'])/(df['B4_mean']-1)) / (np.sqrt((df['B8_mean'])/(df['B4_mean']))+1)
    df['savi'] = (1+1) * (df['B8_mean']-df['B4_mean'])/(df['B8_mean']+df['B4_mean'])
    df['ctvi'] = (df['ndvi']+0.5)/(abs(df['ndvi']+0.5)) * np.sqrt(abs(df['ndvi']+0.5))
    df['ttvi'] = np.sqrt(abs((df['B8_mean']-df['B4_mean'])/(df['B8_mean']+df['B4_mean']) + 0.5))
    df['rvi'] = df['B4_mean'] / df['B8_mean']
    df['nrvi'] = (df['rvi']-1)/(df['rvi']+1)
    df['ipvi'] = (df['B8_mean']) / (df['B8_mean']+df['B4_mean'])
    df['osavi'] = (df['B8_mean']-df['B4_mean']) / (df['B8_mean']+df['B4_mean']+0.16)
    df['tndvi'] = np.sqrt(df['ndvi']+0.5)
    df['grvi'] = (df['B3_mean']-df['B4_mean']) / (df['B3_mean']+df['B4_mean'])
    df['arvi'] = (df['B8_mean']-(2*df['B4_mean']-df['B2_mean']))/(df['B8_mean']+(2*df['B4_mean']-df['B2_mean']))
    return df

In [None]:
# Features used to train model
selected_features = ['B8_max', 'grvi', 'B2_mean', 'B3_mean', 'B8_min', 'B4_std', 'B3_max', 'B8_mean',
                     'gndvi', 'B8_std', 'B2_max', 'B4_min', 'B2_min', 'B4_mean', 'B3_min', 'B2_std',
                     'B4_max', 'msr', 'ctvi', 'rvi', 'osavi', 'sr', 'ndvi', 'nrvi', 'ipvi', 'ttvi', 
                     'savi', 'tndvi', 'evi', 'B3_std']

In [None]:
# Load model
saved_model_path = './model/rf-final.joblib'
rf = joblib.load(saved_model_path)

In [None]:
# Find test data
test_data_dir = './data/test'
test_samples = sorted(Path(test_data_dir).glob('Hila_*.csv'))
# Find test data (needed for the last plot only)
train_data_dir = './data/train'
train_samples = sorted(Path(train_data_dir).glob('Hila_*.csv'))

In [None]:
# Read and transform test data 
df = []
for path in test_samples:
    tmp_df = pd.read_csv(path, index_col=0)
    tmp_df['cell_id'] = path.stem.split('_')[1]
    df.append(tmp_df)
df = pd.concat(df).reset_index(drop=True)
df['agbm'] = 0.0256 * df['agbm'] / 1000
df = set_index_variables(df)
df.dropna(axis=0, inplace=True)

In [None]:
# Predict biomass
agbm_pred = rf.predict(df[selected_features])

In [None]:
print('Test Mean Absolute Error:', metrics.mean_absolute_error(df['agbm'], agbm_pred))
print('Test Mean Squared Error:', metrics.mean_squared_error(df['agbm'], agbm_pred))
print('Test Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(df['agbm'], agbm_pred)))

In [None]:
# Transform data into GeoDataFrame
df['geometry'] = df.coords.apply(lambda x: Polygon(ast.literal_eval(x)[0][:]))
gdf = gpd.GeoDataFrame(df[['geometry', 'agbm', 'cell_id']], crs='epsg:4326').to_crs(crs=3857)
gdf['agbm_prediction'] = test_agbm_pred

In [None]:
# Compare statistics (over all test map cells)
display(gdf.describe([.25, .5, .75, .90, .95, .99]))

## Histogram

In [None]:
def plot_histogram(df, xlim, out_file=None):
    
    plt.figure(figsize=(14,10))
    plt.hist(df.agbm, bins=50, range=(0, xlim),
              alpha=0.5, label='Actual')

    plt.hist(df.agbm_prediction, bins=50, range=(0, xlim),
              alpha=0.5, label='Predicted')
    plt.xticks(list(range(0, xlim+1)))

    plt.xlabel('Above Ground Biomass (tonnes)', size=14)
    plt.ylabel('Number of Samples', size=14)
    plt.legend(loc='upper right')
    
    ymid = 8000
    plt.vlines(x=xlim, ymin=0, ymax=ymid, color='black', lw=1, ls='--')
    plt.text(xlim-0.1, ymid + 250, '99%', verticalalignment='center', color='black')
    
    if out_file:
        plt.savefig(out_file, bbox_inches='tight')

In [None]:
for cell_id in gdf.cell_id.unique():
    g = gdf[gdf.cell_id == cell_id]
    print(cell_id)
    print(f'Total AGB: {round(g.agbm.sum())}')
    print(f'Total Predicted AGB: {round(g.agbm_prediction.sum())}')
    stats = g.describe(percentiles=[.25, .5, .75, .90, .95, .99])
    display(stats)
    xlim = round(stats.loc['99%'].agbm)
    # out_file = f'./model/histogram-{cell_id}.png'
    plot_histogram(g, xlim, out_file=None)

## Choropleth

In [None]:
def plot_choropleth(df, vmax, out_file=None, cmap='BuPu'):
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(40,40),
                                   sharex=True, sharey=True, constrained_layout=True)
    
    ax = df.plot('agbm', ax=ax1, cmap=cmap, vmin=0, vmax=vmax)
    cx.add_basemap(ax, source=cx.providers.Esri.WorldImagery)
    ax.set_title('Actual', fontdict={'fontsize': 45}, pad=15)
    ax.set_axis_off()

    ax = df.plot('agbm_prediction', ax=ax2, cmap=cmap, vmin=0, vmax=vmax)
    cx.add_basemap(ax, source=cx.providers.Esri.WorldImagery)
    ax.set_title('Predicted', fontdict={'fontsize': 45}, pad=15)
    ax.set_axis_off()

    cbar = fig.colorbar(ax1.collections[0], ax=[ax1, ax2],
                        orientation='horizontal', pad=0.01)
    cbar.set_label(label='Above Ground Biomass (tonnes)', size=30)
    cbar.ax.tick_params(labelsize=25)

    if out_file:
        plt.savefig(out_file, bbox_inches='tight')

In [None]:
for cell_id in gdf.cell_id.unique():
    print(cell_id)
    g = gdf[gdf.cell_id == cell_id]
    stats = g.describe(percentiles=[.25, .5, .75, .90, .95, .99])
    vmax = round(stats.loc['99%'].agbm)
    # out_file = f'./model/choropleth-{cell_id}.png'
    plot_choropleth(g, vmax, out_file=None)

## Background of Choropleth

In [None]:
def plot_basemap(df, out_file=None):
    ax = df.plot(alpha=0, figsize=(40,40))
    cx.add_basemap(ax, source=cx.providers.Esri.WorldImagery)
    ax.set_axis_off()
    if out_file:
        plt.savefig(out_file, bbox_inches='tight')

In [None]:
for cell_id in gdf.cell_id.unique():
    print(cell_id)
    # out_file = f'model/basemap-{cell_id}.png'
    plot_basemap(gdf[gdf.cell_id == cell_id], out_file=None)

## Map Cells

In [None]:
def plot_sample_map(df, out_file=None):
    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
    fin = world[world.name == 'Finland']
    ax = fin.boundary.plot(color='black', figsize=(20, 20))
    cmap = colors.ListedColormap(['green', 'purple'])
    df.plot(ax=ax, column='use', cmap=cmap, legend=True)
    for x, y, label in zip(df.geometry.centroid.x, df.geometry.centroid.y, df.cell_id):
        xytext = (-68, -4) if label == 'M4143E' else (8, -4)
        ax.annotate(label, xy=(x, y), xytext=xytext, textcoords='offset points', size=14, color='black')
    ax.set_axis_off()
    
    if out_file:
        plt.savefig(out_file, bbox_inches='tight')

In [None]:
def read_sample_as_gdf(path, dissolve=False):
    path = Path(path)
    tmp_df = pd.read_csv(path, index_col=0)
    tmp_df['geometry'] = tmp_df.coords.apply(lambda x: Polygon(ast.literal_eval(x)[0][:]))
    tmp_df = gpd.GeoDataFrame(tmp_df, crs='epsg:4326')
    if dissolve:
        tmp_df = tmp_df.dissolve(aggfunc='mean')
    tmp_df['cell_id'] = path.stem.split('_')[1]
    return tmp_df

In [None]:
all_samples = list(zip(train_samples, ['Train']*len(train_samples)))
all_samples += list(zip(test_samples, ['Test']*len(test_samples)))

In [None]:
cell_gdf = []
for path, use in all_samples:
    tmp_gdf = read_sample_as_gdf(path, dissolve=True)[['geometry', 'cell_id']]
    tmp_gdf['use'] = use
    tmp_gdf['use'].astype('category')
    cell_gdf.append(tmp_gdf)
cell_gdf = gpd.GeoDataFrame(pd.concat(cell_gdf, ignore_index=True), crs='epsg:4326')

In [None]:
# out_file = './model/all_samples.png'
plot_sample_map(cell_gdf, out_file=None)