## Analyze AutoXval Results
Combine multile csv files from AutoXval run on eagle and plot GHI and DNI

In [None]:
from mlclouds.autoxval import XVal, AutoXVal, CONFIG
from rex.utilities.loggers import init_logger
import pandas as pd
import glob
import matplotlib.pyplot as plt

plt.style.use('ggplot')

In [None]:
# Directory with statistics csv files 
csv_dir = '*.csv'

In [None]:
files = glob.glob(csv_dir)
print('Loading', files)
dfs = (pd.read_csv(file, index_col=0).drop('index', axis=1) for file in files)
stats = pd.concat(dfs).reset_index()
display(stats)

In [None]:
fp = '/projects/mlclouds/ground_measurement/surfrad_meta.csv'
surf_meta = pd.read_csv(fp, index_col=0)
surf_meta.index.name = 'gid'
surf_meta = surf_meta[['surfrad_id']]
surf_meta

In [None]:
def plot_stats(var):
    """ Plot Baseline and PhyGNN MAE versus # of sites used in training """
    if var == 'GHI':
        ylim = (25, 60)
        ytxt = 30
    elif var == 'DNI':
        ylim = (50, 170)
        ytxt = 60
    else:
        raise AttributeError('{} is not allowed'.format(var))

    for site in [0,1,2,3,4,5,6]:
        s = stats[(stats.val_site == site) & (stats.Site == surf_meta.surfrad_id[site].upper())]
        stats_phygnn = s[(s.Model == 'PhyGNN') & (s.Variable == var)
             & (s.Condition == 'Missing Cloud Data')]
        stats_base = s[(s.Model == 'Baseline') & (s.Variable == var)
             & (s.Condition == 'Missing Cloud Data')]

        plt.figure()
        plt.plot(stats_base.num_ts, stats_base['MAE (%)'], 
                 label=f'{site} {surf_meta.surfrad_id[site].upper()} Baseline')
        plt.plot(stats_phygnn.num_ts, stats_phygnn['MAE (%)'], marker='x',
                 label=f'{site} {surf_meta.surfrad_id[site].upper()} PhyGNN')

        plt.xlabel('Number of training sites')
        plt.ylabel(f'{var} MAE (%)')
        plt.ylim(ylim)
        plt.title(f'Cross validating PhyGNN - {surf_meta.surfrad_id[site].upper()}' )
        
        if stats_phygnn['MAE (%)'].shape == (1,):
            print('{} MAE (%) for {} is {}'
                  ''.format(var, surf_meta.surfrad_id[site].upper(),
                            round(stats_phygnn['MAE (%)'].values[0], 1)))
        elif stats_phygnn['MAE (%)'].shape == (0,):
            print('Training for {} appeared to have a loss of nan'
                  ''.format(surf_meta.surfrad_id[site].upper()))
        else:
            plt.text(1, ytxt, 'Final PGNN={}%'.format(
                     round(stats_phygnn['MAE (%)'].iloc[-1], 1)))
            
        plt.legend()
        plt.show()
        
plot_stats('GHI')
plot_stats('DNI')