In [8]:
import sys
sys.path.append('..')
from swdf.benchmark import *
from tsai.basics import *
import sklearn
from swdf.utils import *
import wandb
wandb_api = wandb.Api()

In [9]:
# Config
config = yaml2dict('config/solfsmy.yaml')
config = config.eval

config_base = yaml2dict('config/base.yaml')

config, config_base

({'dst_data_path': '../data/DST_IAGA2002.txt',
  'learner_artifact': None,
  'round_preds': True,
  'solfsmy_data_path': '../data/SOLFSMY.TXT'},
 {'arch_name': 'PatchTST',
  'arch_config_file': 'config/patchtst.yaml',
  'bs': 1024,
  'horizon': 6,
  'init_weights': False,
  'lookback': 36,
  'lr_max': None,
  'n_epoch': 5,
  'partial_n': 0.1,
  'seed': 42,
  'test_start_datetime': '2012-10-01 12:00:00',
  'test_end_datetime': '2018-12-31 12:00:00',
  'wandb': {'enabled': False,
   'log_learner': True,
   'mode': 'offline',
   'group': None,
   'project': 'swdf'}})

In [10]:
learn = load_learner_all("tmp", model_fname = 'model', 
                         verbose=True, 
                         device=default_device())

Learner loaded:
path          = 'tmp'
dls_fname     = '['dls_0.pth', 'dls_1.pth', 'dls_2.pth', 'dls_3.pth', 'dls_4.pth', 'dls_5.pth']'
model_fname   = 'model.pth'
learner_fname = 'learner.pkl'


In [11]:
y_test_preds, y_test = learn.get_preds(ds_idx = 2, with_targs=True)
y_persistence_preds, y_persistence = torch.load('tmp/y_persistence_preds.pt'), torch.load('tmp/y_persistence.pt')

y_test_preds = to_np(y_test_preds)
y_test = to_np(y_test)
y_persistence_preds = to_np(y_persistence_preds)
y_persistence = to_np(y_persistence)

print(f"y_test_preds.shape: {y_test_preds.shape}")  
print(f"y_persistence_preds.shape: {y_test.shape}")  

y_test_preds.shape: (408, 4, 3)
y_persistence_preds.shape: (408, 4, 3)


In [12]:
horizon = y_test.shape[-1]
data_columns_fcst = ['F10', 'S10', 'M10', 'Y10']

In [13]:
y_test_split, y_test_preds_split, y_persistence_split, y_persistence_preds_split = {}, {}, {}, {}
for i, var in enumerate(data_columns_fcst):
    y_test_split[var] = y_test[:,i,:]
    y_test_preds_split[var] = y_test_preds[:,i,:]
    y_persistence_split[var] = y_persistence[:,i,:]
    y_persistence_preds_split[var] = y_persistence_preds[:,i,:]


# Check the shape of each variable
for var in data_columns_fcst:
    print(f"{var}: {[y_test_split[var].shape]}, {[y_test_preds_split[var].shape]}, {[y_persistence_split[var].shape]}, {[y_persistence_preds_split[var].shape]}")

F10: [(408, 3)], [(408, 3)], [(408, 3)], [(408, 3)]
S10: [(408, 3)], [(408, 3)], [(408, 3)], [(408, 3)]
M10: [(408, 3)], [(408, 3)], [(408, 3)], [(408, 3)]
Y10: [(408, 3)], [(408, 3)], [(408, 3)], [(408, 3)]


In [14]:
def forecast_correlation(y_true, y_pred, h):
    # Pearson correlation between the forecasted value and the actual value
    # Inputs:
    # y_true: actual values (samples x steps)
    # y_pred: predicted values (samples x steps)
    # h: horizon of the forecast (1..horizon)
    return np.corrcoef(y_pred[:, h-1], y_true[:, h-1])[0,1]

def forecast_rmse(y_true, y_pred, h):
    # RMSE between the forecasted value and the actual value
    # Inputs:
    # y_true: actual values (samples x steps)
    # y_pred: predicted values (samples x steps)
    # h: horizon of the forecast (1..horizon)
    return np.sqrt(np.mean((y_pred[:, h-1] - y_true[:, h-1])**2))

def forecast_mape(y_true, y_pred, h):
    # MAPE between the forecasted value and the actual value
    # Inputs:
    # y_true: actual values (samples x steps)
    # y_pred: predicted values (samples x steps)
    # h: horizon of the forecast (1..horizon)
    return np.mean(np.abs((y_pred[:, h-1] - y_true[:, h-1]) / y_true[:, h-1])) * 100

In [15]:
columns=['Driver', 'Relative metric', 'Value', 'Scalar Mean', 'Scalar STD']
rmse, mape, correlation, fe_persistence = None, None, None, None
df_results = pd.DataFrame(columns=columns)

for var in data_columns_fcst:
    rmse, mape, correlation, fe_persistence = [], [], [], []
    for h in range(1, horizon+1):
        rmse.append(forecast_rmse(y_test_split[var], 
                            y_test_preds_split[var], h))
        mape.append(forecast_mape(y_test_split[var],
                            y_test_preds_split[var], h))
        correlation.append(forecast_correlation(y_test_split[var],
                                     y_test_preds_split[var], h))
        fe_persistence.append(forecast_error(y_persistence_split[var],
                                   y_persistence_preds_split[var], h))

    df_results = pd.concat([
        df_results,
        pd.DataFrame([
                [var, 'RMSE', np.mean(rmse), np.mean(fe_persistence), np.std(fe_persistence)],
                [var, 'MAPE', np.mean(mape), np.mean(fe_persistence), np.std(fe_persistence)],
                [var, 'R', np.mean(correlation), np.mean(fe_persistence), np.std(fe_persistence)],
            ], columns=columns)
    ])
    
df_results

Unnamed: 0,Driver,Relative metric,Value,Scalar Mean,Scalar STD
0,F10,RMSE,9.622891,-0.006127,10.004161
1,F10,MAPE,4.460686,-0.006127,10.004161
2,F10,R,0.970795,-0.006127,10.004161
0,S10,RMSE,4.111161,-0.022242,4.564721
1,S10,MAPE,2.486035,-0.022242,4.564721
2,S10,R,0.995023,-0.022242,4.564721
0,M10,RMSE,6.093913,0.048938,6.614127
1,M10,MAPE,3.849775,0.048938,6.614127
2,M10,R,0.986016,0.048938,6.614127
0,Y10,RMSE,5.05781,-0.267075,4.778


In [16]:
def generate_df_results(y_true, y_preds):
    columns=['variable', 'statistic', 'result']
    rmse, mape, correlation = None, None, None
    df_results = pd.DataFrame(columns=columns)

    for var in data_columns_fcst:
        rmse, mape, correlation = [], [], []
        for h in range(1, horizon+1):
            rmse.append(forecast_rmse(y_true[var], 
                                y_preds[var], h))
            mape.append(forecast_mape(y_true[var],
                                y_preds[var], h))
            correlation.append(forecast_correlation(y_true[var],
                                            y_preds[var], h))
        df_results = pd.concat([
            df_results,
            pd.DataFrame([
                    [var, 'RMSE', np.mean(rmse)],
                    [var, 'MAPE', np.mean(mape)],
                    [var, 'R', np.mean(correlation)]
                ], columns=columns),
        ])

    return df_results

In [17]:
df_persistence = generate_df_results(y_persistence_split, y_persistence_preds_split)

df_results ['Value Scaled'] = df_results['Value'] / df_persistence['result']
df_results.reset_index(inplace=True)
df_persistence.reset_index(inplace=True)

df_results['Persistence'] = df_persistence['result']
df_results

Unnamed: 0,index,Driver,Relative metric,Value,Scalar Mean,Scalar STD,Value Scaled,Persistence
0,0,F10,RMSE,9.622891,-0.006127,10.004161,0.980755,9.811718
1,1,F10,MAPE,4.460686,-0.006127,10.004161,1.012555,4.405377
2,2,F10,R,0.970795,-0.006127,10.004161,1.001497,0.969345
3,0,S10,RMSE,4.111161,-0.022242,4.564721,0.942854,4.360337
4,1,S10,MAPE,2.486035,-0.022242,4.564721,0.969922,2.563129
5,2,S10,R,0.995023,-0.022242,4.564721,1.000908,0.99412
6,0,M10,RMSE,6.093913,0.048938,6.614127,0.976255,6.242129
7,1,M10,MAPE,3.849775,0.048938,6.614127,0.989295,3.891433
8,2,M10,R,0.986016,0.048938,6.614127,1.000941,0.985089
9,0,Y10,RMSE,5.05781,-0.267075,4.778,1.098733,4.603312


In [19]:
df_results_paper = pd.read_csv('../data/NNEComparison_results.csv', dtype={'Driver': str, 'Relative metric': str, 'Value': float, 'Scalar Mean': float, 'Scalar STD': float})

df_results_paper['Value_PatchTST'] = df_results['Value']

df_persistence.reset_index(inplace=True)
df_results_paper['Persistence'] = df_persistence['result']

In [20]:
def highlight_max(row):
    # Empty color mask
    colors = [''] * len(row)
    if row['Relative metric'] == 'R':
        max_value = max(pd.to_numeric(row, errors="coerce").dropna())
    else:
        max_value = min(pd.to_numeric(row, errors="coerce").dropna())  # Get the maximum value in the row
    for i, value in enumerate(row):
        if value == max_value:
            colors[i] = 'background-color: grey'  # Highlight with yellow
    return colors

# Compare and highlight # Get max of each element in both dataframes
df_numeric = df_results_paper.select_dtypes(include=[np.number])
styled_result = df_results_paper.style.apply(highlight_max, axis=1)

styled_result

Unnamed: 0,Driver,Relative metric,SET,Transfer learning,UV-MLE,MV-MLE,MV-MLE (PCA),Value_PatchTST,Persistence
0,F10,RMSE,0.927,0.799,0.911,0.75,0.773,9.622891,9.811718
1,F10,MAPE,0.939,0.823,0.904,0.771,0.805,4.460686,4.405377
2,F10,R,1.005,1.024,1.013,1.029,1.028,0.970795,0.969345
3,S10,RMSE,0.854,0.735,0.738,0.731,0.703,4.111161,4.360337
4,S10,MAPE,0.835,0.758,0.755,0.803,0.736,2.486035,2.563129
5,S10,R,1.005,1.008,1.008,1.01,1.009,0.995023,0.99412
6,M10,RMSE,0.761,0.646,0.751,0.623,0.596,6.093913,6.242129
7,M10,MAPE,0.771,0.687,0.764,0.658,0.651,3.849775,3.891433
8,M10,R,1.019,1.026,1.021,1.029,1.029,0.986016,0.985089
9,Y10,RMSE,0.971,0.836,0.999,0.834,0.832,5.05781,4.603312


In [None]:
# Compare these results with the results in the paper. The results in the paper
# can be found in the data folder as a csv
df_results_paper_Licata = pd.read_csv('../data/paper_results.csv')

# Filter out the rows with Statistics = 'EBM'
df_results_paper_Licata = df_results_paper_Licata[df_results_paper_Licata['statistic'] != 'EBM']

df_results_paper_Licata["Values"] = df_results_paper_Licata[["1 Days", "2 Days", "3 Days", "4 Days", "5 Days", "6 Days"]].mean(axis=1)
values = df_results_paper_Licata.groupby(['variable','statistic']).agg({
    'Values': 'mean',
})
values