# Overall comparison of the models' performance (New Metrics)

## Importing

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import dill
import os


## Printing 1

In [None]:
def printing(name,data,categories):

     temp = pd.DataFrame(data.transpose(),columns=np.arange(1,7),index=categories)
     print(name)
     display(temp)
     

## Printing 2 (Anually)

In [None]:
def printing2(j,name,targets,predictions,years,categories):

     temp = pd.DataFrame(predictions.transpose(),columns=years,index=categories)
     temp.loc['targets']=(targets.transpose())
     print(name+ ' (Cluster ' +str(j)+ ')')
     display(temp)
     

## Plotting 1

In [None]:
def plotting (name,quantity,categories):

    for i in range (0,len(categories)):
        plt.plot(quantity[:,i],marker= '*', label=categories[i])

    plt.xlabel('Clusters')
    plt.xticks(ticks=np.arange(0,len(quantity)), labels=np.arange(1,len(quantity)+1))
    plt.suptitle(name)
    plt.legend()
    plt.show()


## Plotting 2 (Anually)

In [None]:
def plotting2(name,targets,predictions,years,categories):

    for j in range (0,len(predictions)):

        printing2(j,'Summation',targets[j,:,0],predictions[j],years,categories)

        plt.plot(years,targets[j,:,0],marker = '*', label='targets',linewidth=1.5,color='k')
        plt.plot(years,predictions[j],marker= '*', label=categories)
        plt.xlabel('Years')
        plt.suptitle(name+ ' (Cluster '+str(j) + ')')
        plt.legend()
        plt.show()


## Plotting Mean Values

In [None]:
def plotting_mean_values(ticks,years,targets,predictions,categories,):

    targets_masked = np.ma.array(targets)

    predictions_masked = np.ma.array(predictions)

    for year in years[:-1]:
        targets_masked[ticks] = np.ma.masked
        predictions_masked[ticks] = np.ma.masked

    for i in range (0,targets.shape[-1]):

        fig, _ = plt.subplots(figsize=(19,5))

        plt.plot(targets_masked[:,i], label = 'targets', linewidth=1.5,color='k')

        for j in range (0, len(categories)):
            plt.plot(predictions_masked[:,i,j], label = categories[j])
            plt.xlabel('Years')
            plt.xticks(ticks,years)
            plt.suptitle('Mean Concentrations [mmol m-2] (Cluster '+str(i+1)+ ')')
            plt.legend()

        plt.show()


## Initialization

In [None]:
# Paths
diat = '/data/ibougoudis/MOAD/files/results/Diatom/'
flag = '/data/ibougoudis/MOAD/files/results/Flagellate/'

years = np.array((2021,2022,2023,2024))

variable = diat

categories0 = os.listdir(variable)
print(categories0)

categories = ['func_reg_cl_target','func_reg_cl_target_s']

# Only to obtain dimensions - number of clusters
with open(variable+categories[0] + '/targets-predictions.pkl', 'rb') as f:
    metrics = dill.load(f)

temp = metrics[0]

r_train = np.zeros((temp.shape[2],len(categories)))
rms_train = np.zeros((temp.shape[2],len(categories)))
slope_train = np.zeros((temp.shape[2],len(categories)))
r_train_season = np.zeros((temp.shape[2],len(categories)))
slope_train_season = np.zeros((temp.shape[2],len(categories)))

r_test = np.zeros((temp.shape[2],len(categories)))
rms_test = np.zeros((temp.shape[2],len(categories)))
slope_test = np.zeros((temp.shape[2],len(categories)))

r_test_season = np.zeros((temp.shape[2],len(categories)))
slope_test_season = np.zeros((temp.shape[2],len(categories)))

targets_sum = np.zeros((temp.shape[2],len(years),len(categories)))
predictions_sum = np.zeros((temp.shape[2],len(years),len(categories)))

targets_mean =np.zeros((temp.shape[2],len(years),len(categories)))
predictions_mean = np.zeros((temp.shape[2],len(years),len(categories)))

targets_diff = np.zeros((temp.shape[2],temp.shape[0]*temp.shape[1],len(categories)))
predictions_diff = np.zeros((temp.shape[2],temp.shape[0]*temp.shape[1],len(categories)))

rss = np.zeros((temp.shape[2],len(categories)))

predictions = np.zeros((temp.shape[0],temp.shape[1],temp.shape[2],len(categories)))


## Variable Loading

In [None]:
for i in range (0, len(categories)):

    with open(variable+categories[i] + '/train_metrics.pkl', 'rb') as f:
        metrics = dill.load(f)

    r_train[:,i] = metrics[0]
    rms_train[:,i] = metrics[1]
    slope_train[:,i] = metrics[2]

    r_train_season[:,i] = metrics[3]
    slope_train_season[:,i] = metrics[4]

    season = metrics[5]

    with open(variable+categories[i] + '/test_metrics.pkl', 'rb') as f:
        metrics = dill.load(f)

    r_test[:,i] = metrics[0]
    rms_test[:,i] = metrics[1]
    slope_test[:,i] = metrics[2]
    r_test_season[:,i] = metrics[3]
    slope_test_season[:,i] = metrics[4]

    targets_sum[:,:,i] = metrics[5]
    predictions_sum[:,:,i] = metrics[6]
    targets_mean[:,:,i] = metrics[7]
    predictions_mean[:,:,i] = metrics[8]

    targets_diff[:,:,i] = metrics[9]
    predictions_diff[:,:,i] = metrics[10]

    rss[:,i] = metrics[11]

    with open(variable+categories[i] + '/targets-predictions.pkl', 'rb') as f:
        metrics = dill.load(f)

    targets = metrics[0]
    predictions[:,:,:,i] = metrics[1]


## Time-series Calculations

In [None]:
targets_season = np.zeros(targets.shape)
predictions_season = np.zeros(predictions.shape)

for i in range (0,targets.shape[1]):
    targets_season[:,i,:] = targets[:,i,:] -season.transpose()

    for j in range (0, len(categories)):
        predictions_season[:,i,:,j] = predictions[:,i,:,j] -season.transpose()

ticks = []
for i in range (0,targets.shape[0]*targets.shape[1],targets.shape[0]):
    ticks.append(i)

targets_new = np.reshape(targets,(targets.shape[0]*targets.shape[1],targets.shape[-1]),order='F')
targets_season_new = np.reshape(targets_season,(targets_season.shape[0]*targets_season.shape[1],targets_season.shape[-1]),order='F')

predictions_new = np.zeros((targets_new.shape[0],targets_new.shape[1],predictions.shape[-1]))
predictions_season_new = np.zeros((targets_new.shape[0],targets_new.shape[1],predictions.shape[-1]))

for j in range (0, len(categories)):
    predictions_new[:,:,j] = np.reshape(predictions[:,:,:,j],(predictions[:,:,:,j].shape[0]*predictions[:,:,:,j].shape[1],predictions[:,:,:,j].shape[2]),order='F')
    predictions_season_new[:,:,j] = np.reshape(predictions_season[:,:,:,j],(predictions_season[:,:,:,j].shape[0]*predictions_season.shape[1],predictions_season.shape[2]),order='F')


## Training Metrics

In [None]:
printing('Correlation Coefficient (Training)',r_train,categories)
plotting('Correlation Coefficient (Training)',r_train,categories)

printing('Root Mean Square Error (Training)',rms_train,categories)
plotting('Root Mean Square Error (Training)',rms_train,categories)

printing('Slope of the Best Fitting Line (Training)',slope_train,categories)
plotting('Slope of the Best Fitting Line (Training)',slope_train,categories)

printing('Correlation Coefficient (Training, no Seasonality)',r_train_season,categories)
plotting('Correlation Coefficient (Training, no Seasonality)',r_train_season,categories)

printing('Slope of the Best Fitting Line (Training, no Seasonality)',slope_train_season,categories)
plotting('Slope of the Best Fitting Line (Training, no Seasonality)',slope_train_season,categories)


## Testing Metrics

In [None]:
printing('Correlation Coefficient (Testing)',r_test,categories)
plotting('Correlation Coefficient (Testing)',r_test,categories)

printing('Root Mean Square Error (Testing)',rms_test,categories)
plotting('Root Mean Square Error (Testing)',rms_test,categories)

printing('Slope of the Best Fitting Line (Testing)',slope_test,categories)
plotting('Slope of the Best Fitting Line (Testing)',slope_test,categories)

printing('Correlation Coefficient (Testing, no Seasonality)',r_test_season,categories)
plotting('Correlation Coefficient (Testing, no Seasonality)',r_test_season,categories)

printing('Slope of the Best Fitting Line (Testing, no Seasonality)',slope_test_season,categories)
plotting('Slope of the Best Fitting Line (Testing, no Seasonality)',slope_test_season,categories)

printing('Sum of Squared Residuals (Testing)',rss,categories)
plotting('Sum of Squared Residuals (Testing)',rss,categories)


## Annual Metrics

In [None]:
plotting2('Summation',targets_sum,predictions_sum,years,categories)

plotting2('Mean',targets_mean,predictions_mean,years,categories)

plotting_mean_values(ticks,years,targets_season_new,predictions_season_new,categories)
