# Overall comparison of the models' performance (spatial means) (Flagellate)

## Importing

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xskillscore as xs
from sklearn.metrics import root_mean_squared_error as rmse
import dill


## Plotting

In [2]:
def plotting (metric,categories,years,quantity,name):

    fig,ax = plt.subplots()

    for i in range (0,len(categories)):
        ax.plot(years,quantity[i,:],marker= '*', label=categories[i])
    plt.xlabel('Years')
    plt.suptitle(metric+ ' ' +name+ ' (15 Feb - 30 Apr)')
    plt.legend()
    plt.show()

## Printing

In [3]:
def printing(metric,years,data,categories,criteria):

     temp = pd.DataFrame(data,columns=years,index=categories)
     print(metric+ ' ' +criteria)
     display(temp)
     

## Initiation

In [4]:
def initiation(id,years,categories):

    if id == 0:

        r = np.zeros(len(categories))
        rms =  np.zeros(len(categories))
        slope = np.zeros(len(categories))

    else:

        r = np.zeros((len(categories),len(years))) # First axis is the model categories, second the years
        rms = np.zeros((len(categories),len(years)))
        slope = np.zeros((len(categories),len(years)))

    return (r,rms,slope)


## Summary

In [5]:
def summary(years,quantity,categories):

    print (quantity[36:-1])
    print ('\n')

    r_train,rms_train,slope_train = initiation(0,years,categories) # 0 for training, 1 for testing

    r_test,rms_test,slope_test = initiation(1,years,categories)

    r_test_season,rms_test_season,slope_test_season = initiation(1,years,categories) 

    for i in range (0, len(categories)):

        with open(quantity+categories[i]+ '/metrics.pkl', 'rb') as f:

            metrics = dill.load(f)
        
        r_train[i], rms_train[i], slope_train[i] = metrics[0:3]
        
        r_test[i,:], rms_test[i,:], slope_test[i,:] = metrics[3:6]

        r_test_season[i,:], rms_test_season[i,:], slope_test_season[i,:] = metrics[6:9]

    temp = np.concatenate((r_train,rms_train,slope_train))
    temp = temp.reshape(3,len(categories))
    
    temp = pd.DataFrame(temp.transpose(),columns=['r', 'rms', 'slope'],index=categories)
    print('Training')
    display(temp)
    print ('\n')

    printing('Spatial means',years,r_test,categories,'Correlation coefficient')
    plotting('Spatial means',categories,years,r_test,'correlation coefficient')

    printing('Spatial means',years,rms_test,categories, 'Root mean square error')
    plotting('Spatial means',categories,years,rms_test,'root mean square error')

    printing('Spatial means',years,slope_test,categories, 'Slope of the best fitting line')
    plotting('Spatial means',categories,years,slope_test,'slope of the best fitting line')

    printing('Spatial means (removed seasonality)',years,r_test_season,categories,'Correlation coefficient')
    plotting('Spatial means (removed seasonality)',categories,years,r_test_season,'correlation coefficient')

    printing('Spatial means (removed seasonality)',years,rms_test_season,categories, 'Root mean square error')
    plotting('Spatial means (removed seasonality)',categories,years,rms_test_season,'root mean square error')

    printing('Spatial means (removed seasonality)',years,slope_test_season,categories, 'Slope of the best fitting line')
    plotting('Spatial means (removed seasonality)',categories,years,slope_test_season,'slope of the best fitting line')
    

## Main Body

In [None]:
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

# Paths
diat = '/data/ibougoudis/MOAD/files/results/Diatom/'
flag = '/data/ibougoudis/MOAD/files/results/Flagellate/'

categories = ['func_reg_s', 'func_reg_s_sm']

years = ['2021','2022','2023','2024']

summary(years,diat,categories)
