# Evaluation of SOLFSMY

> With respect to the benchmark by Licata et al.

In [None]:
# Imports
import sys
sys.path.append('..')
from swdf.benchmark import *
from tsai.basics import *
from sklearn.metrics import mean_squared_error, mean_absolute_error
from swdf.utils import *
import wandb
wandb_api = wandb.Api()

In [None]:
# Constants
ARTIFACT_DOWNLOAD_PATH = Path(os.environ["WANDB_DIR"])/"wandb/artifacts/solfsmy_eval_tmp"

In [None]:
# Config
config = yaml2dict('config/solfsmy.yaml')
config = config.eval
config

In [None]:
# TODO
# learn.path = Path(path)
# learn.model_dir = Path()
if config.learner_artifact is None:
    learner_path = 'tmp'
else:
    learner_path = wandb_api.artifact(config.learner_artifact).download(root=ARTIFACT_DOWNLOAD_PATH)
learn = load_learner_all(learner_path, model_fname = 'model', 
                         verbose=True, 
                         device=default_device())

In [None]:
# Print the test loss
print(f'Test loss: {learn.validate(ds_idx=2)[0]}')

In [None]:
y_test_preds, y_test = learn.get_preds(ds_idx = 2, with_targs=True)
y_test_preds = to_np(y_test_preds)
y_test = to_np(y_test)
print(f"y_test_preds.shape: {y_test_preds.shape}")     

In [None]:
horizon = y_test.shape[-1]
data_columns_fcst = ['F10', 'S10', 'M10', 'Y10']

The evaluation is done for each variable separately, for each solar activity 
level (low, moderate, elevated and high), as in the paper by Licata et al. (2020)
Therefore, the test set has to be split into 4 different sets, one for each solar activity level. The thresholds for each solar activity level are the same as in the paper by Licata et al. (2020)

The thresholds are:
- F10: 75, 150, 190
- S10: 65, 150, 215
- M10: 72, 144, 167
- Y10: 81, 148, 165

In [None]:
#|export

def get_idxs_per_solar_activity_level(data, thresholds):
    # function that splits the data of a variable into 4 different sets, 
    # one for each solar activity level. The data comes as a numpy array with 
    # shape (samples, steps), and the split is done along the samples axis. 
    # The decision is made based on the first column of each sample. The function 
    # returns a list of 4 numpy arrays, one for each solar activity level. 
    # But it does not return the values, it returns the indices of the
    # samples that belong to each solar activity level.
    idxs_per_solar_activity_level = []
    for i in range(len(thresholds) + 1):
        if i == 0:
            idxs = np.where(data[:, 0] <= thresholds[i])[0]
        elif i == len(thresholds):
            idxs = np.where(data[:, 0] > thresholds[i-1])[0]
        else:
            idxs = np.where((data[:, 0] > thresholds[i-1]) & (data[:, 0] <= thresholds[i]))[0]
        idxs_per_solar_activity_level.append(idxs)
    return idxs_per_solar_activity_level

In [None]:
# Call the function for each variable, using y_test
thresholds = {'F10': [75, 150, 190], 
              'S10': [65, 150, 215], 
              'M10': [72, 144, 167], 
              'Y10': [81, 148, 165]}

y_test_split_idxs = {}
for i, var in enumerate(data_columns_fcst):
    y_test_split_idxs[var] = get_idxs_per_solar_activity_level(y_test[:, i, :], 
                                                               thresholds[var])
# Check the shape of each variable
for var in data_columns_fcst:
    print(f"{var}: {[y_test_split_idxs[var][i].shape for i in range(4)]}")

In [None]:
# Now split y_test and y_test_preds with the idxs we got, using numpy.take
y_test_split = {}
y_test_preds_split = {}
for var in data_columns_fcst:
    y_test_split[var] = [y_test[:, i, :].take(y_test_split_idxs[var][i], axis=0) for i in range(4)]
    y_test_preds_split[var] = [y_test_preds[:, i, :].take(y_test_split_idxs[var][i], axis=0) for i in range(4)]

# Check the shape of each variable in y_test_split
for var in data_columns_fcst:
    print(f"y_test-{var}: {[y_test_split[var][i].shape for i in range(4)]}")
    print(f"y_test_preds-{var}: {[y_test_preds_split[var][i].shape for i in range(4)]}")

In [None]:
#| hide

# Test with random data (torch)
foo = torch.rand(10, 3)
bar = torch.rand(10, 3)
print(forecast_error(foo, bar, 1))
print(percent_forecast_error(foo, bar, 1))

In [None]:
# Create a table (dataframe) with the mean forecast error for each variable (F10, S10, M10, Y10), 
# each solar activity level and each horizon (1..horizon)
data = []
sals = ['low', 'moderate', 'elevated', 'high']
for var_idx, var in enumerate(data_columns_fcst):
    for sal_idx,sal in enumerate(sals):
        for h in range(1, horizon+1):
            fe_sfu = forecast_error(y_test_split[var][sal_idx], 
                                y_test_preds_split[var][sal_idx], h)
            fe_percent = percent_forecast_error(y_test_split[var][sal_idx],
                                            y_test_preds_split[var][sal_idx], h)
            n_samples = y_test_split[var][sal_idx].shape[0]
            data.append([var, sal, h, np.mean(fe_sfu), np.std(fe_sfu), 
                        np.mean(fe_percent), np.std(fe_percent), n_samples])
df_results = pd.DataFrame(data, columns=['variable', 'condition', 'horizon', 
                                         'mean_sfu', 'std_sfu', 'mean_percent', 
                                         'std_percent', 'n_samples'])
df_results.head(10)

In [None]:
# Divide the table into one dataframe for each variable, and print it in a way
# that has the same format as the table in the paper, that is:
# Columns: | Condition | Statistics | 1 Day | 2 Days | 3 Days | ... | {{horizon}} Days,
# where condition is the variable and the solar activity level, and statistics 
# is the mean (column mean_fe) and the standard deviation (std_fe) of the forecast error.
for i, var in enumerate(data_columns_fcst):
    df_var = df_results[df_results['variable'] == var]

    df_var = df_var.melt(id_vars=['condition', 'horizon'], 
                         value_vars=['mean_sfu', 'std_sfu'], 
                         var_name='Statistic')
    # Sort the values of the column condition so that the order is 
    # low, moderate, elevated, high
    df_var['condition'] = pd.Categorical(df_var['condition'], 
                                      categories=['low', 'moderate', 'elevated', 'high'], 
                                      ordered=True)
    df_var = df_var.pivot_table(index=['condition', 'Statistic'], 
                          columns='horizon', 
                          values='value')
    # pretty print, and separate with a blank line
    print(f'Distribution Statistics {var} Error Distribution \n{df_var.to_string()}\n')

In [None]:
# Convert df_results into long format
df_results_lf = df_results.melt(id_vars=['variable', 'condition', 'horizon'],
                                value_vars=['mean_sfu', 'std_sfu', 'mean_percent', 'std_percent'],
                                var_name='statistic')

# Filter out the percent statistics
df_results_lf = df_results_lf[df_results_lf['statistic'].str.contains('percent') == False]

# Rename the Statistic 'mean_sfu' to just 'mean' and 'std_sfu' to 'std'
df_results_lf['statistic'] = df_results_lf['statistic'].str.replace('_sfu', '')

df_results_lf.head()

In [None]:
# Compare these results with the results in the paper. The results in the paper
# can be found in the data folder as a csv
df_results_paper = pd.read_csv('../data/paper_results.csv')

# Filter out the rows with Statistics = 'EBM'
df_results_paper = df_results_paper[df_results_paper['statistic'] != 'EBM']

# Rename the values of the column condition to the ones of the sals variable
df_results_paper['condition'] = df_results_paper['condition'].str.replace('Low solar', 'low')
df_results_paper['condition'] = df_results_paper['condition'].str.replace('Moderate solar', 'moderate')
df_results_paper['condition'] = df_results_paper['condition'].str.replace('Elevated solar', 'elevated')
df_results_paper['condition'] = df_results_paper['condition'].str.replace('High solar', 'high')

df_results_paper.head()

In [None]:
# Convert df_results_paper into long format
df_results_paper_lf = df_results_paper.melt(id_vars=['variable', 'condition', 'statistic'],
                                            value_vars=[f'{i} Days' for i in range(1, horizon+1)],
                                            var_name='horizon', value_name='value')

# Convert the value sof the horizon column to int
df_results_paper_lf['horizon'] = df_results_paper_lf['horizon'].str.replace(' Days', '').astype(int)
df_results_paper_lf.head()

In [None]:
# Join the two dataframes (df_results_ours and df_results_paper) on the columns 
# variable, condition and Statistic
df_results_joined = df_results_lf.merge(df_results_paper_lf, how='left', 
                                            on=['variable', 'condition', 'statistic', 'horizon'],
                                            suffixes=('_ours', '_paper'))
len(df_results_lf), len(df_results_paper_lf), len(df_results_joined)

In [None]:
# Reshape the table so that it compares the results of our model with the results 
# of the paper. Create one table for each variable, and print it in a way that has
# the following format:
# Columns: | Condition | 1 Day | 2 Days | 3 Days | ... | {{horizon}} Days
# where condition is the variable and the solar activity level, and the values
# for each horizon are the concatenation of the mean and the standard deviation
# (with the symbol ±) of the forecast error of our model and the paper.

# Spread the values of the column statistic.
foo = df_results_joined.pivot_table(index=['variable', 'condition', 'horizon'],
                                                    columns='statistic',
                                                    values=['value_ours', 'value_paper'])


# Mutate the columns mean and std so that they are bolded in case they are the the 
# higher that the same statistic in the paper, or vice versa. The mutated column
# will not replace the original column, but will be added as a new column. Don't
# use the apply function
foo.loc[:, ('value_ours', 'mean_str')] = np.where(np.abs(foo['value_ours']['mean']) < np.abs(foo['value_paper']['mean']), 
                    '\\textbf{' + foo['value_ours']['mean'].astype(str) + '}',
                    foo['value_ours']['mean'].astype(str))
foo.loc[:, ('value_ours', 'std_str')] = np.where(np.abs(foo['value_ours']['std']) < np.abs(foo['value_paper']['std']),
                    '\\textbf{' + foo['value_ours']['std'].astype(str) + '}',
                    foo['value_ours']['std'].astype(str))
foo.loc[:, ('value_paper', 'mean_str')] = np.where(np.abs(foo['value_paper']['mean']) < np.abs(foo['value_ours']['mean']),
                    '\\textbf{' + foo['value_paper']['mean'].astype(str) + '}',
                    foo['value_paper']['mean'].astype(str))
foo.loc[:, ('value_paper', 'std_str')] = np.where(np.abs(foo['value_paper']['std']) < np.abs(foo['value_ours']['std']),
                    '\\textbf{' + foo['value_paper']['std'].astype(str) + '}',
                    foo['value_paper']['std'].astype(str))

# Drop the columns mean and std but not the mutated columns mean_str and std_str
foo = foo.drop(columns=[('value_ours', 'mean'), ('value_ours', 'std'),
                        ('value_paper', 'mean'), ('value_paper', 'std')])

# Unite the mean and the standard deviation into a single column for both our
# model and the paper, and drop the mean_str and std_str columns
foo.loc[:, ('value_ours', 'mean ± std')] = foo['value_ours']['mean_str'] + ' ± ' + foo['value_ours']['std_str']
foo.loc[:, ('value_paper', 'mean ± std')] = foo['value_paper']['mean_str'] + ' ± ' + foo['value_paper']['std_str']
foo = foo.drop(columns=[('value_ours', 'mean_str'), ('value_ours', 'std_str'),
                        ('value_paper', 'mean_str'), ('value_paper', 'std_str')])

# Drop the level 0 of the columns, and rename the columns
foo.columns = foo.columns.droplevel(0)
foo.columns = ['NN', 'benchmark']

# Reset the index
foo = foo.reset_index()

# Sort the values of the column condition so that the order is 
# low, moderate, elevated, high
foo['condition'] = pd.Categorical(foo['condition'], categories=['low', 'moderate', 'elevated', 'high'], ordered=True)
foo = foo.sort_values(by=['variable', 'condition'])


# Print as a Latex table, one table for each variable
for variable in data_columns_fcst:
    print(foo[foo['variable'] == variable].drop(columns='variable').to_latex(
        index=False, 
        escape=False,
        column_format='|l|' + '|c|' * horizon,
        caption=f'Comparison of the results of the paper with the results of our model for the variable {variable}',
        label=f'tab:comparison_{variable}'))



### Visualize predictions

In [None]:
# hide
plot_solar_algorithm_performance(df_results, 'F10')

In [None]:
# hide
plot_solar_algorithm_performance(df_results, 'S10')

In [None]:
# hide
plot_solar_algorithm_performance(df_results, 'M10')

In [None]:
# hide
plot_solar_algorithm_performance(df_results, 'Y10')