In [3]:
import sys
sys.path.append('..')
from swdf.benchmark import *
from tsai.basics import *
import sklearn
from swdf.utils import *
import wandb
wandb_api = wandb.Api()

In [4]:
# Config
config = yaml2dict('config/solfsmy_comparison.yaml')
config = config.eval

config_base = yaml2dict('config/base.yaml')

config, config_base

({'solfsmy_data_path': '../data/SOLFSMY.TXT'},
 {'arch_name': 'PatchTST',
  'arch_config_file': 'config/patchtst.yaml',
  'bs': 1024,
  'horizon': 6,
  'init_weights': False,
  'lookback': 36,
  'lr_max': None,
  'n_epoch': 30,
  'partial_n': 0.1,
  'seed': 42,
  'test_start_datetime': '2012-10-01 12:00:00',
  'test_end_datetime': '2018-12-31 12:00:00',
  'wandb': {'enabled': False,
   'log_learner': True,
   'mode': 'offline',
   'group': None,
   'project': 'swdf'}})

In [5]:
learn = load_learner_all("tmp", model_fname = 'model', 
                         verbose=True, 
                         device=default_device())

y_persistence_preds, y_persistence = torch.load('tmp/y_persistence_preds.pt'), torch.load('tmp/y_persistence.pt')


Learner loaded:
path          = 'tmp'
dls_fname     = '['dls_0.pth', 'dls_1.pth', 'dls_2.pth', 'dls_3.pth']'
model_fname   = 'model.pth'
learner_fname = 'learner.pkl'


In [6]:
y_test_preds, y_test = learn.get_preds(ds_idx = 2, with_targs=True)

y_test_preds = to_np(y_test_preds)
y_test = to_np(y_test)
y_persistence_preds = to_np(y_persistence_preds)
y_persistence = to_np(y_persistence)

print(f"y_test_preds.shape: {y_test_preds.shape}")  
print(f"y_persistence_preds.shape: {y_test.shape}")  

y_test_preds.shape: (136, 4, 6)
y_persistence_preds.shape: (136, 4, 6)


In [7]:
horizon = y_test.shape[-1]
data_columns_fcst = ['F10', 'S10', 'M10', 'Y10']

In [8]:
y_test_dict, y_test_preds_dict, y_persistence_dict, y_persistence_preds_dict = {}, {}, {}, {}
for i, var in enumerate(data_columns_fcst):
    y_test_dict[var] = y_test[:,i]
    y_test_preds_dict[var] = y_test_preds[:,i]
    y_persistence_dict[var] = y_persistence[:,i]
    y_persistence_preds_dict[var] = y_persistence_preds[:,i]

In [9]:
def forecast_rmse(y_true, y_pred):
    """
    Calculate the Root Mean Squared Error (RMSE) between true and predicted values.

    Args:
    y_true (torch.Tensor): True values.
    y_pred (torch.Tensor): Predicted values.

    Returns:
    torch.Tensor: RMSE value.
    """
    return np.sqrt(np.mean((y_pred - y_true) ** 2))


def forecast_mape(y_true, y_pred):
    """
    Calculate the Mean Absolute Percentage Error (MAPE) between true and predicted values.

    Args:
    y_true (torch.Tensor): True values.
    y_pred (torch.Tensor): Predicted values.

    Returns:
    torch.Tensor: MAPE value.
    """
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


def forecast_correlation(y_true, y_pred):
    """
    Calculate the Pearson Correlation Coefficient (R) between true and predicted values.

    Args:
    y_true (torch.Tensor): True values.
    y_pred (torch.Tensor): Predicted values.

    Returns:
    torch.Tensor: Pearson Correlation Coefficient.
    """
    mean_y_true = np.mean(y_true)
    mean_y_pred = np.mean(y_pred)
    
    cov = np.mean((y_true - mean_y_true) * (y_pred - mean_y_pred))
    std_y_true = np.std(y_true)
    std_y_pred = np.std(y_pred)
    
    return cov / (std_y_true * std_y_pred)


In [10]:
columns=['Driver', 'Relative metric', 'Value']
rmse, mape, correlation, fe_persistence = None, None, None, None
df_results = pd.DataFrame(columns=columns)

for var in data_columns_fcst:    
    rmse = forecast_rmse(y_test_dict[var], 
                            y_test_preds_dict[var])
    mape = forecast_mape(y_test_dict[var],
                            y_test_preds_dict[var])
    correlation = forecast_correlation(y_test_dict[var],
                                     y_test_preds_dict[var])


    df_results = pd.concat([
        df_results,
        pd.DataFrame([
                [var, 'RMSE', rmse],
                [var, 'MAPE', mape],
                [var, 'R', correlation],
            ], columns=columns)
    ])
    
df_results = df_results.reset_index(drop=True)
df_results

Unnamed: 0,Driver,Relative metric,Value
0,F10,RMSE,12.709373
1,F10,MAPE,6.04407
2,F10,R,0.949281
3,S10,RMSE,6.440771
4,S10,MAPE,3.642698
5,S10,R,0.988334
6,M10,RMSE,9.431362
7,M10,MAPE,5.569709
8,M10,R,0.969419
9,Y10,RMSE,7.061479


In [11]:
def generate_df_results(y_true, y_preds):
    columns=['Driver', 'Relative metric', 'Value']
    data_columns_fcst = ['F10', 'S10', 'M10', 'Y10']
    df_results = pd.DataFrame(columns=columns)

    for var in data_columns_fcst:    
        rmse = forecast_rmse(y_true[var], 
                                y_preds[var])
        mape = forecast_mape(y_true[var],
                                y_preds[var])
        correlation = forecast_correlation(y_true[var],
                                        y_preds[var])


        df_results = pd.concat([
            df_results,
            pd.DataFrame([
                    [var, 'RMSE', rmse],
                    [var, 'MAPE', mape],
                    [var, 'R', correlation],
                ], columns=columns)
        ])
    return df_results.reset_index(drop=True)

In [12]:
df_persistence = generate_df_results(y_persistence_dict, y_persistence_preds_dict).reset_index(drop=True)

df_results ['Value Scaled'] = df_results['Value'] / df_persistence['Value']
df_results.reset_index(inplace=True)
df_persistence.reset_index(inplace=True)

df_results['Persistence'] = df_persistence['Value']
df_results

Unnamed: 0,index,Driver,Relative metric,Value,Value Scaled,Persistence
0,0,F10,RMSE,12.709373,0.915182,13.887262
1,1,F10,MAPE,6.04407,0.911048,6.634198
2,2,F10,R,0.949281,1.00923,0.940599
3,3,S10,RMSE,6.440771,0.848606,7.589829
4,4,S10,MAPE,3.642698,0.896809,4.061846
5,5,S10,R,0.988334,1.004705,0.983706
6,6,M10,RMSE,9.431362,0.902764,10.447209
7,7,M10,MAPE,5.569709,0.906718,6.142711
8,8,M10,R,0.969419,1.007168,0.96252
9,9,Y10,RMSE,7.061479,0.960235,7.35391


In [13]:
df_results_paper = pd.read_csv('../data/NNEComparison_results.csv', dtype={'Driver': str, 'Relative metric': str, 'Value': float, 'Scalar Mean': float, 'Scalar STD': float})

df_results_paper['Value_PatchTST'] = df_results['Value Scaled']

df_persistence.reset_index(inplace=True)
df_results_paper['Persistence'] = df_persistence['Value']

In [14]:
def highlight_max(row):
    # Empty color mask
    colors = [''] * len(row)
    if row['Relative metric'] == 'R':
        max_value = max(pd.to_numeric(row, errors="coerce").dropna())
    else:
        max_value = min(pd.to_numeric(row, errors="coerce").dropna())  # Get the maximum value in the row
    for i, value in enumerate(row):
        if value == max_value:
            colors[i] = 'background-color: grey'  # Highlight with yellow
    return colors

# Compare and highlight # Get max of each element in both dataframes
df_numeric = df_results_paper.select_dtypes(include=[np.number])
styled_result = df_results_paper.style.apply(highlight_max, axis=1)

styled_result

Unnamed: 0,Driver,Relative metric,SET,Transfer learning,UV-MLE,MV-MLE,MV-MLE (PCA),Value_PatchTST,Persistence
0,F10,RMSE,0.927,0.799,0.911,0.75,0.773,0.915182,13.887262
1,F10,MAPE,0.939,0.823,0.904,0.771,0.805,0.911048,6.634198
2,F10,R,1.005,1.024,1.013,1.029,1.028,1.00923,0.940599
3,S10,RMSE,0.854,0.735,0.738,0.731,0.703,0.848606,7.589829
4,S10,MAPE,0.835,0.758,0.755,0.803,0.736,0.896809,4.061846
5,S10,R,1.005,1.008,1.008,1.01,1.009,1.004705,0.983706
6,M10,RMSE,0.761,0.646,0.751,0.623,0.596,0.902764,10.447209
7,M10,MAPE,0.771,0.687,0.764,0.658,0.651,0.906718,6.142711
8,M10,R,1.019,1.026,1.021,1.029,1.029,1.007168,0.96252
9,Y10,RMSE,0.971,0.836,0.999,0.834,0.832,0.960235,7.35391


In [15]:
# Compare these results with the results in the paper. The results in the paper
# can be found in the data folder as a csv
df_results_paper_Licata = pd.read_csv('../data/paper_results.csv')

# Filter out the rows with Statistics = 'EBM'
df_results_paper_Licata = df_results_paper_Licata[df_results_paper_Licata['statistic'] != 'EBM']

df_results_paper_Licata["Values"] = df_results_paper_Licata[["1 Days", "2 Days", "3 Days", "4 Days", "5 Days", "6 Days"]].mean(axis=1)
values = df_results_paper_Licata.groupby(['variable','statistic']).agg({
    'Values': 'mean',
})
values

Unnamed: 0_level_0,Unnamed: 1_level_0,Values
variable,statistic,Unnamed: 2_level_1
F10,mean,7.610175
F10,std,19.379083
M10,mean,3.560033
M10,std,13.182642
S10,mean,6.200683
S10,std,20.197242
Y10,mean,1.396167
Y10,std,9.715729
