Importing libraries

In [1]:
import pandas as pd
import os
import time
import warnings
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler, LabelEncoder

warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('CleanedDataset.csv')
df.head()

Unnamed: 0,date,hr_reading,cal_bur_reading,step_reading,stress_score,stress_grade,spo_saturation,systolic,diastolic,calories,...,is_work_time,is_rest_time,sleep_stress_mean,work_stress_mean,rest_stress_mean,time_since_high_stress_min,spo2_rolling_mean_30min,spo2_variability_30min,spo2_sleep_mean,bgl_reading
0,2025-02-13 05:15:00+00:00,59.25,0.0,0.0,33.0,2.0,95.5,157.0,82.0,0.0,...,0,0,42.549451,0.0,0.0,0.0,96.25,1.06066,96.25,86.0
1,2025-02-13 05:30:00+00:00,55.5,0.0,0.0,45.432234,2.034799,99.0,157.0,82.0,0.0,...,0,0,42.549451,0.0,0.0,0.0,97.25,2.474874,97.25,87.0
2,2025-02-13 05:45:00+00:00,63.5,0.0,0.0,40.0,2.0,97.0,157.0,82.0,0.0,...,0,0,40.216117,0.0,0.0,0.0,98.0,1.414214,98.0,91.0
3,2025-02-13 06:00:00+00:00,61.0,0.0,0.0,45.432234,2.034799,98.0,157.0,82.0,0.0,...,0,1,0.0,0.0,0.0,0.0,97.5,0.707107,97.0,92.0
4,2025-02-13 06:15:00+00:00,60.666667,0.0,0.0,37.0,2.0,96.0,157.0,82.0,0.0,...,0,1,0.0,0.0,0.0,0.0,97.0,1.414214,0.0,88.0


In [3]:
# Ensure timestamp is in datetime format
df['date'] = pd.to_datetime(df['date'])

# Sort by timestamp just in case
df = df.sort_values('date')
df = df.rename(columns={'date': 'timestamp'})

In [4]:
low_variation_cols = []

for col in df.columns:
    if df[col].nunique() > 1:
        top_freq_ratio = df[col].value_counts(normalize=True).iloc[0]
        if top_freq_ratio > 0.95:
            low_variation_cols.append(col)

print(f"Columns with low variation (<5% unique values): ({len(low_variation_cols)} total)")
for col in low_variation_cols:
    print(f"- {col}")

# Find constant columns (only 1 unique value)
constant_features = df.columns[df.nunique() == 1].tolist()

print(f"Constant Features (no variation): ({len(constant_features)} total)")

df_cleaned = df.drop(columns=constant_features)

print(f"Dropped {len(constant_features)} constant features.")
print(f"Remaining shape: {df_cleaned.shape} (rows, columns)")

# Calculate % of missing values per column
missing_percent = df_cleaned.isna().mean() * 100

# Filter only columns with missing values
missing_percent = missing_percent[missing_percent > 0].sort_values(ascending=False)

# Display results
print(f"Columns with missing values ({len(missing_percent)} total):")
for col, pct in missing_percent.items():
    print(f"- {col}: {pct:.2f}% missing")

Columns with low variation (<5% unique values): (8 total)
- calories
- carbs
- fat
- protein
- fiber (g)
- GI
- hr_intensity_zone
- step_intensity_zone
Constant Features (no variation): (0 total)
Dropped 0 constant features.
Remaining shape: (2709, 66) (rows, columns)
Columns with missing values (0 total):


In [5]:
# Drop rows where bgl_reading is missing, but keep other columns
df_cleaned= df.dropna(axis=1)

# Show new shape
print(f"Dropped columns with missing values.")
print(f"Remaining DataFrame shape: {df_cleaned.shape} (rows, columns)")

Dropped columns with missing values.
Remaining DataFrame shape: (2709, 66) (rows, columns)


In [6]:
# Step 1: Rename columns for NeuralProphet
df_np = df_cleaned.rename(columns={
    'timestamp': 'ds',
    'bgl_reading': 'y'
})

# Step 2: Calculate time difference in minutes (actual time gap between rows)
df_np['time_diff'] = df_np['ds'].diff().dt.total_seconds() / 60  # convert to minutes
df_np['time_diff'].fillna(15, inplace=True)  # Fill first row with default 15 min

# Step 3: Define future regressors (all columns except 'ds' and 'y')
future_regressors = [col for col in df_np.columns if col not in ['ds', 'y']]
print("Future Regressors:")
print(future_regressors)

non_numeric = df_np[[col for col in df_np.columns if col not in ['ds', 'y']]].select_dtypes(include=['object'])

non_numeric.columns = non_numeric.columns.tolist()
print("Non-numeric regressors:")
print(non_numeric.columns)

from sklearn.preprocessing import LabelEncoder

# Initialize the encoder
label_encoder = LabelEncoder()

# Apply encoding to each column
for col in non_numeric.columns:
    df_np[col] = label_encoder.fit_transform(df_np[col].astype(str))

# Extract useful time components (optional)
df_np['weekday'] = df_np['ds'].dt.dayofweek  # 0=Monday
df_np['month'] = df_np['ds'].dt.month
df_np['day'] = df_np['ds'].dt.day
df_np['hour'] = df_np['ds'].dt.hour
df_np['minute'] = df_np['ds'].dt.minute
df_np['week'] = df_np['ds'].dt.isocalendar().week

# Derived features
df_np['is_weekend'] = df_np['weekday'].apply(lambda x: 1 if x >= 5 else 0)

# Periodic encodings
df_np['hour_sin'] = np.sin(2 * np.pi * df_np['hour'] / 24)
df_np['hour_cos'] = np.cos(2 * np.pi * df_np['hour'] / 24)
df_np['minute_sin'] = np.sin(2 * np.pi * df_np['minute'] / 60)
df_np['minute_cos'] = np.cos(2 * np.pi * df_np['minute'] / 60)

# Drop original date column
if 'date' in df_np.columns:
    df_np.drop(columns=['date'], inplace=True)

Future Regressors:
['hr_reading', 'cal_bur_reading', 'step_reading', 'stress_score', 'stress_grade', 'spo_saturation', 'systolic', 'diastolic', 'calories', 'carbs', 'fat', 'protein', 'fiber (g)', 'GI', 'sleep_frag_value', 'duration', 'mean_sleep_score', 'max_sleep_score', 'sleep_score_variability', 'sleep_score_change', 'transition_density', 'sleep_stability_index', 'sleep_stage_volatility', 'cumulative_sleep_score', 'weighted_sleep_depth', 'transition_density_function', 'hr_30min_avg', 'hrv', 'hr_ema', 'hr_zscore', 'hr_baseline_diff', 'hr_trend_slope', 'resting_hr', 'hr_intensity_zone', 'hr_recovery', 'hr_response_delay', 'cal_bur_30min_sum', 'cumulative_cal_burn', 'cal_bur_change_rate', 'cal_bur_activity_intensity', 'cal_bur_time_since_high_intensity', 'steps_30min_sum', 'steps_1hr_sum', 'step_std_30min', 'steps_vs_distance_walked', 'step_rate', 'time_since_last_steps_in_sec', 'steps_active_bout_duration', 'step_intensity_zone', 'stress_3readings_mean', 'stress_6readings_mean', 'stre

Splitting Dataset

In [7]:
df = df_np.copy()
df['ds'] = pd.to_datetime(df['ds'])
sorted_days = sorted(df['ds'].dt.date.unique())
n_days = len(sorted_days)
n_train = int(n_days * 0.6)
n_val = int(n_days * 0.2)
train_days = sorted_days[:n_train]
val_days = sorted_days[n_train:n_train + n_val]
test_days = sorted_days[n_train + n_val:]
df_train = df[df['ds'].dt.date.isin(train_days)].sort_values(by='ds').reset_index(drop=True)
df_val = df[df['ds'].dt.date.isin(val_days)].sort_values(by='ds').reset_index(drop=True)
df_test = df[df['ds'].dt.date.isin(test_days)].sort_values(by='ds').reset_index(drop=True)

future_regressors = [col for col in df_train.columns if col not in ['ds', 'y']]
scaler = StandardScaler()
df_train[future_regressors] = scaler.fit_transform(df_train[future_regressors])
df_val[future_regressors] = scaler.transform(df_val[future_regressors])
df_test[future_regressors] = scaler.transform(df_test[future_regressors])

In [8]:
print(f" Train days: {len(train_days)} | Rows: {len(df_train)} | Date Range: {df_train['ds'].min().date()} → {df_train['ds'].max().date()}")
print(f" Validation days: {len(val_days)} | Rows: {len(df_val)} | Date Range: {df_val['ds'].min().date()} → {df_val['ds'].max().date()}")
print(f" Test days: {len(test_days)} | Rows: {len(df_test)} | Date Range: {df_test['ds'].min().date()} → {df_test['ds'].max().date()}")

 Train days: 17 | Rows: 1611 | Date Range: 2025-02-13 → 2025-03-01
 Validation days: 5 | Rows: 480 | Date Range: 2025-03-02 → 2025-03-06
 Test days: 7 | Rows: 618 | Date Range: 2025-03-07 → 2025-03-13


In [9]:
# df_val.to_csv('orginal_val.csv')

In [10]:
print(df_train.columns.to_list())

['ds', 'hr_reading', 'cal_bur_reading', 'step_reading', 'stress_score', 'stress_grade', 'spo_saturation', 'systolic', 'diastolic', 'calories', 'carbs', 'fat', 'protein', 'fiber (g)', 'GI', 'sleep_frag_value', 'duration', 'mean_sleep_score', 'max_sleep_score', 'sleep_score_variability', 'sleep_score_change', 'transition_density', 'sleep_stability_index', 'sleep_stage_volatility', 'cumulative_sleep_score', 'weighted_sleep_depth', 'transition_density_function', 'hr_30min_avg', 'hrv', 'hr_ema', 'hr_zscore', 'hr_baseline_diff', 'hr_trend_slope', 'resting_hr', 'hr_intensity_zone', 'hr_recovery', 'hr_response_delay', 'cal_bur_30min_sum', 'cumulative_cal_burn', 'cal_bur_change_rate', 'cal_bur_activity_intensity', 'cal_bur_time_since_high_intensity', 'steps_30min_sum', 'steps_1hr_sum', 'step_std_30min', 'steps_vs_distance_walked', 'step_rate', 'time_since_last_steps_in_sec', 'steps_active_bout_duration', 'step_intensity_zone', 'stress_3readings_mean', 'stress_6readings_mean', 'stress_var_3readi

Model initialization

In [11]:
class Args:
    # General
    model = 'FEDformer'
    version = 'Fourier'
    mode_select = 'random'
    modes = 64
    L = 3
    base = 'legendre'
    cross_activation = 'tanh'

    # Data
    data = 'CleanedDataset'  # or your custom dataset
    root_path = './'  # change this if needed
    data_path = 'CleanedDataset.csv'  # update if using a different file
    features = 'MS'  # multivariate to single
    target = 'y'  # output target column
    freq = 't'
    detail_freq = '15min'
    checkpoints = './checkpoints/'

    # Forecasting task
    seq_len = 12       # input sequence length (past)
    label_len = 6      # portion of decoder input (can be 50% of seq_len)
    pred_len = 4       # how many future steps to predict

    # Model dimensions
    enc_in = None    # number of input features (match your dataset)
    dec_in = None
    c_out = 1          # output is univariate (1 feature)
    d_model = 512
    n_heads = 8
    e_layers = 2
    d_layers = 1
    d_ff = 2048
    moving_avg = [12]
    factor = 1
    distil = True
    dropout = 0.05
    embed = 'timeF'
    activation = 'gelu'
    output_attention = False
    do_predict = True

    # Optimization
    num_workers = 2
    itr = 1
    train_epochs = 50   # use more for real training
    batch_size = 16
    patience = 2
    learning_rate = 0.0001
    des = 'fedformer_ms_forecast'
    loss = 'mse'
    lradj = 'type1'
    use_amp = False

    # GPU
    use_gpu = torch.cuda.is_available()
    gpu = 0
    use_multi_gpu = False
    devices = '0'
    device_ids = [0]

args = Args()
total_input_features = len(future_regressors) + 1
args.enc_in = total_input_features
args.dec_in = total_input_features
args.c_out = 1  # ensure we only predict y

# Optional device setup logic (multi-GPU safety)
if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]


In [12]:
# --- Configuration Verification Block ---
print("\n--- Verifying Configuration ---")
print("features:", args.features)
print("target:", args.target)
print("c_out:", args.c_out)

assert args.features == 'MS', "args.features should be 'MS' for multivariate-to-single prediction"
assert args.target == 'y', "args.target should be set to the name of the target column, e.g., 'y'"
assert args.c_out == 1, "args.c_out should be 1 to predict a single target variable"

print("All configuration checks passed!\n")


--- Verifying Configuration ---
features: MS
target: y
c_out: 1
All configuration checks passed!



In [16]:
from torch.utils.data import Dataset, DataLoader
class MyTSDataset(Dataset):
    def __init__(self, df, seq_len=12, label_len=6, pred_len=4, target='y', features='MS', scale=True):
        self.seq_len = seq_len
        self.label_len = label_len
        self.pred_len = pred_len
        self.target = target
        self.features = features

        self.scaler = StandardScaler()
        df = df.copy()

        # Reorder columns: date, features..., target
        cols = list(df.columns)
        cols.remove('ds')
        cols.remove(target)
        feature_cols = cols
        df = df[['ds'] + feature_cols + [target]]

        # Use precomputed time features instead of recomputing
        self.data_stamp = df[['weekday', 'month', 'day', 'hour']].values

        if features in ['M', 'MS']:
            self.encoder_data = df[feature_cols + [target]].values
            self.decoder_data = df[[target]].values  # decoder gets only 'y'
        else:
            self.encoder_data = df[[target]].values
            self.decoder_data = df[[target]].values


    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.encoder_data[s_begin:s_end]
        seq_y = self.decoder_data[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.encoder_data) - self.seq_len - self.pred_len + 1

In [17]:
# 2. DataLoader Setup
train_dataset = MyTSDataset(df_train, args.seq_len, args.label_len, args.pred_len, args.target, args.features)
val_dataset = MyTSDataset(df_val, args.seq_len, args.label_len, args.pred_len, args.target, args.features)
test_dataset = MyTSDataset(df_test, args.seq_len, args.label_len, args.pred_len, args.target, args.features)

In [18]:
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, drop_last=True)

In [19]:
print(f"Train dataset length: {len(train_dataset)}")
print(f"Validation dataset length: {len(val_dataset)}")
print(f"Test dataset length: {len(test_dataset)}")

Train dataset length: 1596
Validation dataset length: 465
Test dataset length: 603


In [20]:
# Peek one batch to inspect the columns/features
sample_batch = next(iter(train_loader))
x_enc, x_dec, x_mark_enc, x_mark_dec = sample_batch

print(f"Input encoder shape: {x_enc.shape}")  # [batch_size, seq_len, num_features]
print(f"Input decoder shape: {x_dec.shape}")  # [batch_size, label_len + pred_len, num_features]

# Corresponding feature names
input_feature_names = ['y'] + future_regressors
print(f"Input feature columns used: {input_feature_names}")

Input encoder shape: torch.Size([16, 12, 76])
Input decoder shape: torch.Size([16, 10, 1])
Input feature columns used: ['y', 'hr_reading', 'cal_bur_reading', 'step_reading', 'stress_score', 'stress_grade', 'spo_saturation', 'systolic', 'diastolic', 'calories', 'carbs', 'fat', 'protein', 'fiber (g)', 'GI', 'sleep_frag_value', 'duration', 'mean_sleep_score', 'max_sleep_score', 'sleep_score_variability', 'sleep_score_change', 'transition_density', 'sleep_stability_index', 'sleep_stage_volatility', 'cumulative_sleep_score', 'weighted_sleep_depth', 'transition_density_function', 'hr_30min_avg', 'hrv', 'hr_ema', 'hr_zscore', 'hr_baseline_diff', 'hr_trend_slope', 'resting_hr', 'hr_intensity_zone', 'hr_recovery', 'hr_response_delay', 'cal_bur_30min_sum', 'cumulative_cal_burn', 'cal_bur_change_rate', 'cal_bur_activity_intensity', 'cal_bur_time_since_high_intensity', 'steps_30min_sum', 'steps_1hr_sum', 'step_std_30min', 'steps_vs_distance_walked', 'step_rate', 'time_since_last_steps_in_sec', 'st

In [21]:
print(f"x_enc shape: {x_enc.shape}, expected features: {args.enc_in}")

x_enc shape: torch.Size([16, 12, 76]), expected features: 76


In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
from layers.FourierCorrelation import FourierBlock, FourierCrossAttention
from layers.MultiWaveletCorrelation import MultiWaveletCross, MultiWaveletTransform
from layers.SelfAttention_Family import FullAttention, ProbAttention
from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp, series_decomp_multi
import math
import numpy as np

device = torch.device("cuda" if args.use_gpu else "cpu")

class Model(nn.Module):
    """
    FEDformer performs the attention mechanism on frequency domain and achieved O(N) complexity
    """
    def __init__(self, configs):
        super(Model, self).__init__()
        self.version = configs.version
        self.mode_select = configs.mode_select
        self.modes = configs.modes
        self.seq_len = configs.seq_len
        self.label_len = configs.label_len
        self.pred_len = configs.pred_len
        self.output_attention = configs.output_attention
        self.c_out = configs.c_out

        # Decomp
        kernel_size = configs.moving_avg
        if isinstance(kernel_size, list):
            self.decomp = series_decomp_multi(kernel_size)
        else:
            self.decomp = series_decomp(kernel_size)

        # Embedding
        # The series-wise connection inherently contains the sequential information.
        # Thus, we can discard the position embedding of transformers.
        self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
                                                  configs.dropout)
        self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
                                                  configs.dropout)

        if configs.version == 'Wavelets':
            encoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=configs.L, base=configs.base)
            decoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=configs.L, base=configs.base)
            decoder_cross_att = MultiWaveletCross(in_channels=configs.d_model,
                                                  out_channels=configs.d_model,
                                                  seq_len_q=self.seq_len // 2 + self.pred_len,
                                                  seq_len_kv=self.seq_len,
                                                  modes=configs.modes,
                                                  ich=configs.d_model,
                                                  base=configs.base,
                                                  activation=configs.cross_activation)
        else:
            encoder_self_att = FourierBlock(in_channels=configs.d_model,
                                            out_channels=configs.d_model,
                                            seq_len=self.seq_len,
                                            modes=configs.modes,
                                            mode_select_method=configs.mode_select)
            decoder_self_att = FourierBlock(in_channels=configs.d_model,
                                            out_channels=configs.d_model,
                                            seq_len=self.seq_len//2+self.pred_len,
                                            modes=configs.modes,
                                            mode_select_method=configs.mode_select)
            decoder_cross_att = FourierCrossAttention(in_channels=configs.d_model,
                                                      out_channels=configs.d_model,
                                                      seq_len_q=self.seq_len//2+self.pred_len,
                                                      seq_len_kv=self.seq_len,
                                                      modes=configs.modes,
                                                      mode_select_method=configs.mode_select)
        # Encoder
        enc_modes = int(min(configs.modes, configs.seq_len//2))
        dec_modes = int(min(configs.modes, (configs.seq_len//2+configs.pred_len)//2))
        print('enc_modes: {}, dec_modes: {}'.format(enc_modes, dec_modes))

        self.encoder = Encoder(
            [
                EncoderLayer(
                    AutoCorrelationLayer(
                        encoder_self_att,
                        configs.d_model, configs.n_heads),

                    configs.d_model,
                    configs.d_ff,
                    moving_avg=configs.moving_avg,
                    dropout=configs.dropout,
                    activation=configs.activation
                ) for l in range(configs.e_layers)
            ],
            norm_layer=my_Layernorm(configs.d_model)
        )
        # Decoder
        self.decoder = Decoder(
            [
                DecoderLayer(
                    AutoCorrelationLayer(
                        decoder_self_att,
                        configs.d_model, configs.n_heads),
                    AutoCorrelationLayer(
                        decoder_cross_att,
                        configs.d_model, configs.n_heads),
                    configs.d_model,
                    configs.c_out,
                    configs.d_ff,
                    moving_avg=configs.moving_avg,
                    dropout=configs.dropout,
                    activation=configs.activation,
                )
                for l in range(configs.d_layers)
            ],
            norm_layer=my_Layernorm(configs.d_model),
            projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
        )

    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
                enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
        # decomp init
        mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
        zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]]).to(device)  # cuda()
        seasonal_init, trend_init = self.decomp(x_enc)
        # decoder input
        trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
        seasonal_init = F.pad(seasonal_init[:, -self.label_len:, :], (0, 0, 0, self.pred_len))
        # enc
        enc_out = self.enc_embedding(x_enc, x_mark_enc)
        enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
        # dec
        dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
        seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask,
                                                 trend=trend_init)
        #final
        dec_out = trend_part + seasonal_part
        dec_out = dec_out[:, -self.pred_len:, :self.c_out]  # restrict to just target

        if self.output_attention:
            return dec_out, attns
        else:
            return dec_out


In [29]:
# 3. Forward Pass Example
batch = next(iter(train_loader))
x_enc, x_dec, x_mark_enc, x_mark_dec = [x.float().to(device) for x in batch]

model = Model(args).to(device)
model.eval()

with torch.no_grad():
    output = model(x_enc, x_mark_enc, x_dec, x_mark_dec)

print("\nForward pass output shape:", output.shape)

fourier enhanced block used!
modes=64, index=[0, 1, 2, 3, 4, 5]
fourier enhanced block used!
modes=64, index=[0, 1, 2, 3, 4]
 fourier enhanced cross attention used!
modes_q=5, index_q=[0, 1, 2, 3, 4]
modes_kv=6, index_kv=[0, 1, 2, 3, 4, 5]
enc_modes: 6, dec_modes: 5

Forward pass output shape: torch.Size([16, 4, 1])


In [30]:
# 3. Training, Validation, and Test Evaluation Loop
import torch.nn as nn
import torch.optim as optim
import pandas as pd

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

In [31]:
def evaluate(loader):
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for batch in loader:
            x_enc, x_dec, x_mark_enc, x_mark_dec = [x.float().to(device) for x in batch]
            outputs = model(x_enc, x_mark_enc, x_dec, x_mark_dec)
            preds.append(outputs.cpu())
            trues.append(x_dec[:, -args.pred_len:, :args.c_out].cpu())
    preds = torch.cat(preds, dim=0).numpy()
    trues = torch.cat(trues, dim=0).numpy()
    return preds, trues

In [32]:
for epoch in range(args.train_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        x_enc, x_dec, x_mark_enc, x_mark_dec = [x.float().to(device) for x in batch]
        optimizer.zero_grad()
        outputs = model(x_enc, x_mark_enc, x_dec, x_mark_dec)
        loss = criterion(outputs, x_dec[:, -args.pred_len:, :args.c_out])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    val_preds, val_trues = evaluate(val_loader)
    val_loss = criterion(torch.tensor(val_preds), torch.tensor(val_trues)).item()
    print(f"Epoch [{epoch+1}/{args.train_epochs}] | Train Loss: {avg_train_loss:.4f} | Val Loss: {val_loss:.4f}")

Epoch [1/50] | Train Loss: 4470.3183 | Val Loss: 3421.3914
Epoch [2/50] | Train Loss: 1684.8055 | Val Loss: 1801.5884
Epoch [3/50] | Train Loss: 626.7701 | Val Loss: 642.0796
Epoch [4/50] | Train Loss: 291.1423 | Val Loss: 519.0450
Epoch [5/50] | Train Loss: 208.2501 | Val Loss: 362.0822
Epoch [6/50] | Train Loss: 183.5691 | Val Loss: 328.0480
Epoch [7/50] | Train Loss: 174.5122 | Val Loss: 312.7216
Epoch [8/50] | Train Loss: 159.2595 | Val Loss: 289.2917
Epoch [9/50] | Train Loss: 156.6200 | Val Loss: 235.5773
Epoch [10/50] | Train Loss: 150.0407 | Val Loss: 254.8682
Epoch [11/50] | Train Loss: 144.3252 | Val Loss: 236.2497
Epoch [12/50] | Train Loss: 143.2649 | Val Loss: 251.3521
Epoch [13/50] | Train Loss: 139.5111 | Val Loss: 222.8016
Epoch [14/50] | Train Loss: 133.8525 | Val Loss: 220.6507
Epoch [15/50] | Train Loss: 133.2353 | Val Loss: 236.4669
Epoch [16/50] | Train Loss: 128.5672 | Val Loss: 240.8124
Epoch [17/50] | Train Loss: 123.7307 | Val Loss: 231.9959
Epoch [18/50] | Tra

In [33]:
# Evaluate on test set
preds, trues = evaluate(test_loader)
test_loss = criterion(torch.tensor(preds), torch.tensor(trues)).item()
print(f"Test Loss: {test_loss:.4f}")

Test Loss: 384.2806


In [34]:
# Reshape predictions from [B, pred_len, c_out] → [B * pred_len, c_out]
val_preds_2d = val_preds.reshape(-1, val_preds.shape[-1])
test_preds_2d = preds.reshape(-1, preds.shape[-1])

# Align timestamps accordingly
val_timestamps = df_val['ds'].iloc[args.seq_len + args.label_len - 1:].reset_index(drop=True)
val_timestamps = val_timestamps.iloc[:len(val_preds_2d)]

test_timestamps = df_test['ds'].iloc[args.seq_len + args.label_len - 1:].reset_index(drop=True)
test_timestamps = test_timestamps.iloc[:len(test_preds_2d)]

# Create DataFrames
df_val_output = pd.DataFrame(val_preds_2d, columns=[f"pred_{i}" for i in range(val_preds_2d.shape[-1])])
df_val_output['ds'] = val_timestamps

df_test_output = pd.DataFrame(test_preds_2d, columns=[f"pred_{i}" for i in range(test_preds_2d.shape[-1])])
df_test_output['ds'] = test_timestamps


In [41]:
# View first few rows of validation predictions
df_val_output.to_csv('output_val.csv')
# df_val_output.head()

In [36]:
# View first few rows of test predictions
df_test_output.head()

Unnamed: 0,pred_0,ds
0,95.57901,2025-03-07 04:15:00+00:00
1,95.953041,2025-03-07 04:30:00+00:00
2,104.331367,2025-03-07 04:45:00+00:00
3,98.199821,2025-03-07 05:00:00+00:00
4,100.179825,2025-03-07 05:15:00+00:00


In [None]:
# df_val_output.to_csv('validation.csv')

In [37]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# --- Evaluation Metrics Block ---
def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return np.mean(diff) * 100

def evaluate_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    smape_val = smape(y_true, y_pred)
    return {
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'R2': r2,
        'SMAPE': smape_val
    }

# Flatten predictions and truths for evaluation
val_preds_flat = val_preds.reshape(-1, val_preds.shape[-1])[:, 0]
val_trues_flat = val_trues.reshape(-1, val_trues.shape[-1])[:, 0]
test_preds_flat = preds.reshape(-1, preds.shape[-1])[:, 0]
test_trues_flat = trues.reshape(-1, trues.shape[-1])[:, 0]

val_metrics = evaluate_metrics(val_trues_flat, val_preds_flat)
test_metrics = evaluate_metrics(test_trues_flat, test_preds_flat)

print("Validation Metrics:")
for k, v in val_metrics.items():
    print(f"{k}: {v:.4f}")

print("\nTest Metrics:")
for k, v in test_metrics.items():
    print(f"{k}: {v:.4f}")

Validation Metrics:
MSE: 362.4617
RMSE: 19.0384
MAE: 15.4654
R2: -0.3753
SMAPE: 14.9081

Test Metrics:
MSE: 384.2807
RMSE: 19.6031
MAE: 13.8953
R2: -0.7367
SMAPE: 12.7938


In [47]:
# Generate output_val and output_test DataFrames with aligned timestamps

# Reshape predictions and truths
val_preds_flat = val_preds.reshape(-1)
val_trues_flat = val_trues.reshape(-1)
test_preds_flat = preds.reshape(-1)
test_trues_flat = trues.reshape(-1)

# Construct aligned timestamps from prediction windows
val_timestamps = []
val_limit = len(df_val) - args.label_len - args.seq_len - 1
for i in range(min(len(val_preds), val_limit)):
    base_idx = i + args.seq_len + args.label_len - 1
    if base_idx < len(df_val):
        base_time = df_val['ds'].iloc[base_idx]
        future_times = pd.date_range(start=base_time, periods=args.pred_len, freq=args.detail_freq)
        val_timestamps.extend(future_times)
val_timestamps = val_timestamps[:len(val_preds_flat)]

test_timestamps = []
test_limit = len(df_test) - args.label_len - args.seq_len - 1
for i in range(min(len(preds), test_limit)):
    base_idx = i + args.seq_len + args.label_len - 1
    if base_idx < len(df_test):
        base_time = df_test['ds'].iloc[base_idx]
        future_times = pd.date_range(start=base_time, periods=args.pred_len, freq=args.detail_freq)
        test_timestamps.extend(future_times)
test_timestamps = test_timestamps[:len(test_preds_flat)]

# Final length alignment check
min_val_len = min(len(val_timestamps), len(val_preds_flat), len(val_trues_flat))
min_test_len = min(len(test_timestamps), len(test_preds_flat), len(test_trues_flat))

# Create final output DataFrames
output_val = pd.DataFrame({
    'ds': val_timestamps[:min_val_len],
    'y_true': val_trues_flat[:min_val_len],
    'y_pred': val_preds_flat[:min_val_len]
})

output_test = pd.DataFrame({
    'ds': test_timestamps[:min_test_len],
    'y_true': test_trues_flat[:min_test_len],
    'y_pred': test_preds_flat[:min_test_len]
})


In [48]:
output_val

Unnamed: 0,ds,y_true,y_pred
0,2025-03-02 04:15:00+00:00,80.666664,110.321518
1,2025-03-02 04:30:00+00:00,80.000000,96.385452
2,2025-03-02 04:45:00+00:00,82.000000,94.864960
3,2025-03-02 05:00:00+00:00,81.666664,93.977966
4,2025-03-02 04:30:00+00:00,80.000000,100.416634
...,...,...,...
1839,2025-03-06 23:45:00+00:00,108.666664,103.267921
1840,2025-03-06 23:15:00+00:00,119.333336,98.398338
1841,2025-03-06 23:30:00+00:00,116.666664,98.177521
1842,2025-03-06 23:45:00+00:00,108.666664,102.819344


In [49]:
output_test

Unnamed: 0,ds,y_true,y_pred
0,2025-03-07 04:15:00+00:00,103.333336,95.579010
1,2025-03-07 04:30:00+00:00,103.000000,95.953041
2,2025-03-07 04:45:00+00:00,104.000000,104.331367
3,2025-03-07 05:00:00+00:00,102.666664,98.199821
4,2025-03-07 04:30:00+00:00,103.000000,100.179825
...,...,...,...
2363,2025-03-13 10:45:00+00:00,123.666664,97.144440
2364,2025-03-13 10:15:00+00:00,111.666664,98.079048
2365,2025-03-13 10:30:00+00:00,122.333336,106.433426
2366,2025-03-13 10:45:00+00:00,123.666664,102.655563
