In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.fft import fft, ifft
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks

In [4]:
def ewt_boundaries_equal_energy(spectrum, N):
    energy = np.cumsum(spectrum) / np.sum(spectrum)
    boundaries = []
    for k in range(1, N):
        idx = np.argmin(np.abs(energy - k / N))
        boundaries.append(idx / len(spectrum) * np.pi)
    return boundaries

def ewt_boundaries(spectrum, N, smooth_sigma=2):
    spectrum_smooth = gaussian_filter1d(spectrum, sigma=smooth_sigma)
    if np.allclose(spectrum_smooth, spectrum_smooth[0]):
        return ewt_boundaries_equal_energy(spectrum, N)

    peaks, _ = find_peaks(spectrum_smooth)
    if len(peaks) < N - 1:
        return ewt_boundaries_equal_energy(spectrum, N)

    amps = spectrum_smooth[peaks]
    top_peaks = sorted([p for _, p in sorted(zip(amps, peaks), reverse=True)][:N-1])
    boundaries = [p / len(spectrum) * np.pi for p in top_peaks]
    return boundaries

def make_filter_bank(boundaries, L):
    freqs = np.linspace(0, np.pi, L//2 + 1)
    mfb = []
    phi = np.zeros_like(freqs)
    phi[freqs <= boundaries[0]] = 1
    mfb.append(phi)
    for i in range(len(boundaries)):
        psi = np.zeros_like(freqs)
        if i == len(boundaries) - 1:
            mask = (freqs > boundaries[i])
        else:
            mask = (freqs > boundaries[i]) & (freqs <= boundaries[i+1])
        psi[mask] = 1
        mfb.append(psi)
    return mfb

def EWT1D(signal, N=3, smooth_sigma=2):
    L = len(signal)
    spectrum_half = np.abs(fft(signal))[:L//2 + 1]
    boundaries = ewt_boundaries(spectrum_half, N, smooth_sigma=smooth_sigma)
    mfb = make_filter_bank(boundaries, L)

    modes = []
    S_full = fft(signal)
    for filt in mfb:
        filt_full = np.concatenate([filt, filt[-2:0:-1]])
        mode_freq = S_full * filt_full
        mode_time = np.real(ifft(mode_freq))
        modes.append(mode_time)
    return np.array(modes), boundaries

In [5]:
class RELM:
    def __init__(self, n_hidden=100, activation='tanh', C=1.0, random_state=None):
        self.n_hidden = int(n_hidden)
        self.activation = activation
        self.C = float(C)
        self.random_state = random_state
        self.is_fitted = False

    def _init_weights(self, n_features):
        rng = np.random.default_rng(self.random_state)
        self.W = rng.uniform(-1, 1, size=(self.n_hidden, n_features))
        self.b = rng.uniform(-1, 1, size=(self.n_hidden,))

    def _activation(self, X):
        if self.activation == 'sigmoid':
            X = np.clip(X, -500, 500)
            return 1.0 / (1.0 + np.exp(-X))
        if self.activation == 'tanh':
            return np.tanh(X)
        if self.activation == 'relu':
            return np.maximum(0.0, X)
        raise ValueError(f"Unknown activation: {self.activation}")

    def fit(self, X, y):
        X, y = np.asarray(X), np.asarray(y)
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        N, d = X.shape
        self._init_weights(d)
        H = self._activation(X @ self.W.T + self.b)

        if N >= self.n_hidden:
            A = (np.eye(self.n_hidden) / self.C) + (H.T @ H)
            B = H.T @ y
            self.beta = np.linalg.solve(A, B)
        else:
            A = (np.eye(N) / self.C) + (H @ H.T)
            B = y
            self.beta = H.T @ np.linalg.solve(A, B)
        self.is_fitted = True
        return self

    def predict(self, X):
        H = self._activation(np.asarray(X) @ self.W.T + self.b)
        Y = H @ self.beta
        return Y.ravel() if Y.shape[1] == 1 else Y

In [6]:
def create_multivariate_lagged_dataset(df, target_col, feature_cols, lag=3):
    data = df[feature_cols].values
    target_idx = feature_cols.index(target_col)
    X, y = [], []
    for i in range(lag, len(df)):
        X.append(data[i-lag:i].flatten())
        y.append(data[i, target_idx])
    return np.array(X), np.array(y)

def safe_mape(y_true, y_pred, min_denom=1.0):
    mask = np.abs(y_true) >= min_denom
    if np.sum(mask) == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def sde(y_true, y_pred):
    return float(np.std(np.array(y_true) - np.array(y_pred)))

In [7]:
class GWO:
    def __init__(self, obj_func, lb, ub, dim, n_agents=12, n_iter=25, seed=42):
        self.obj_func = obj_func
        self.lb = np.array(lb)
        self.ub = np.array(ub)
        self.dim = dim
        self.n_agents = n_agents
        self.n_iter = n_iter
        self.rng = np.random.default_rng(seed)

    def optimize(self):
        wolves = self.rng.uniform(self.lb, self.ub, size=(self.n_agents, self.dim))
        fitness = np.array([self.obj_func(w) for w in wolves])
        idx = np.argsort(fitness)
        alpha, beta, delta = wolves[idx[0]], wolves[idx[1]], wolves[idx[2]]
        f_alpha, f_beta, f_delta = fitness[idx[0]], fitness[idx[1]], fitness[idx[2]]

        for t in range(self.n_iter):
            a = 2 - 2 * (t / (self.n_iter - 1 + 1e-9))
            for i in range(self.n_agents):
                X = wolves[i].copy()
                for j in range(self.dim):
                    r1, r2 = self.rng.random(), self.rng.random()
                    A1 = 2 * a * r1 - a
                    C1 = 2 * r2
                    D_alpha = abs(C1 * alpha[j] - X[j])
                    X1 = alpha[j] - A1 * D_alpha

                    r1, r2 = self.rng.random(), self.rng.random()
                    A2 = 2 * a * r1 - a
                    C2 = 2 * r2
                    D_beta = abs(C2 * beta[j] - X[j])
                    X2 = beta[j] - A2 * D_beta

                    r1, r2 = self.rng.random(), self.rng.random()
                    A3 = 2 * a * r1 - a
                    C3 = 2 * r2
                    D_delta = abs(C3 * delta[j] - X[j])
                    X3 = delta[j] - A3 * D_delta

                    X[j] = (X1 + X2 + X3) / 3.0

                wolves[i] = np.clip(X, self.lb, self.ub)

            fitness = np.array([self.obj_func(w) for w in wolves])
            idx = np.argsort(fitness)
            if fitness[idx[0]] < f_alpha:
                alpha, f_alpha = wolves[idx[0]], fitness[idx[0]]
            if fitness[idx[1]] < f_beta:
                beta, f_beta = wolves[idx[1]], fitness[idx[1]]
            if fitness[idx[2]] < f_delta:
                delta, f_delta = wolves[idx[2]], fitness[idx[2]]

        return alpha, f_alpha

In [8]:
def decode_relm_params(position, Hmin=20, Hmax=500, Cmin_log=-4, Cmax_log=4):
    h_raw, c_log, a_raw = position
    n_hidden = int(np.round(Hmin + h_raw * (Hmax - Hmin)))
    C = 10.0 ** float(c_log)
    activation = ['tanh', 'sigmoid', 'relu'][int(np.round(np.clip(a_raw, 0, 2)))]
    return n_hidden, C, activation

def make_relm_objective(X_train, y_train, X_val, y_val, random_state=42):
    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s = scaler.transform(X_val)

    def objective(position):
        n_hidden, C, activation = decode_relm_params(position)
        try:
            model = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=random_state)
            model.fit(X_train_s, y_train)
            y_pred = model.predict(X_val_s)
            return np.sqrt(mean_squared_error(y_val, y_pred))
        except:
            return 1e6
    return objective

In [10]:
def ewt_gwo_relm_no_iewt_multistep(
    df,
    target_column,
    feature_columns,
    lag_steps=12,
    n_modes=4,
    gwo_agents=12,
    gwo_iters=25,
    random_state=42,
    Hmin=20, Hmax=500,
    Cmin_log=-4, Cmax_log=4,
    smooth_sigma=2,
    max_step_eval=7
):
    """
    Returns:
      - summary_df: DataFrame with per-mode one-step metrics and best hyperparams
      - multistep_df: DataFrame rows [Mode, Step, MAE, RMSE, MAPE (%), SDE]
    """
    signal = df[target_column].values
    modes, boundaries = EWT1D(signal, N=n_modes, smooth_sigma=smooth_sigma)

    n_samples = len(signal) - lag_steps
    if n_samples <= 0:
        raise ValueError("lag_steps too large for series length.")
    train_end = int(0.7 * n_samples)
    val_end = int(0.85 * n_samples)

    summary_rows = []
    multistep_rows = []

    for mode_idx in range(n_modes):
        mode_series = modes[mode_idx]
        df_mode = df.copy()
        df_mode[target_column] = mode_series

        X_mode, y_mode = create_multivariate_lagged_dataset(df_mode, target_column, feature_columns, lag=lag_steps)
        X_train, y_train = X_mode[:train_end], y_mode[:train_end]
        X_val, y_val     = X_mode[train_end:val_end], y_mode[train_end:val_end]
        X_test, y_test   = X_mode[val_end:], y_mode[val_end:]

        if len(X_train) < 5 or len(X_val) < 1 or len(X_test) < 1:
            # Not enough data for this split; skip
            print(f"[Mode {mode_idx+1}] Not enough samples for required splits — skipping mode.")
            continue

        # GWO objective (tune on train->val)
        obj = make_relm_objective(X_train, y_train, X_val, y_val, random_state=random_state)
        lb = np.array([0.0, Cmin_log, 0.0], dtype=float)
        ub = np.array([1.0, Cmax_log, 2.0], dtype=float)
        gwo = GWO(obj, lb, ub, dim=3, n_agents=gwo_agents, n_iter=gwo_iters, seed=random_state + mode_idx)
        best_pos, best_fit = gwo.optimize()
        n_hidden, C, activation = decode_relm_params(best_pos, Hmin=Hmin, Hmax=Hmax, Cmin_log=Cmin_log, Cmax_log=Cmax_log)

        # Train final model on train+val for one-step test evaluation
        scaler = StandardScaler()
        X_trval = np.vstack([X_train, X_val])
        y_trval = np.concatenate([y_train, y_val])
        X_trval_s = scaler.fit_transform(X_trval)
        X_test_s = scaler.transform(X_test)

        model_final = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=random_state)
        model_final.fit(X_trval_s, y_trval)
        y_pred_test = model_final.predict(X_test_s)

        mae = mean_absolute_error(y_test, y_pred_test)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))
        mape = safe_mape(y_test, y_pred_test)
        sdev = sde(y_test, y_pred_test)

        summary_rows.append({
            "Mode": mode_idx + 1,
            "n_hidden": n_hidden,
            "C": C,
            "activation": activation,
            "val_rmse_best": float(best_fit),
            "MAE": float(mae),
            "RMSE": float(rmse),
            "MAPE (%)": float(mape) if not np.isnan(mape) else np.nan,
            "SDE": float(sdev)
        })

        # Multi-step direct forecasts (for steps 1..max_step_eval)
        # For fair direct predictions, retrain model on TRAIN only (not train+val)
        scaler_train = StandardScaler()
        X_train_s = scaler_train.fit_transform(X_train)

        for step in range(1, max_step_eval + 1):
            # Inputs that can predict t+step are X_test[:-step], targets are y_test[step:]
            if X_test.shape[0] <= step:
                # Not enough test samples for this step; skip
                continue
            X_test_step = X_test[:-step]
            y_test_step = y_test[step:]

            # Fit model on train only with tuned params
            model_step = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=random_state)
            model_step.fit(X_train_s, y_train)

            # Scale X_test_step with scaler_train
            X_test_step_s = scaler_train.transform(X_test_step)
            y_pred_step = model_step.predict(X_test_step_s)

            mae_s = mean_absolute_error(y_test_step, y_pred_step)
            rmse_s = np.sqrt(mean_squared_error(y_test_step, y_pred_step))
            mape_s = safe_mape(y_test_step, y_pred_step)
            sdev_s = sde(y_test_step, y_pred_step)

            multistep_rows.append({
                "Mode": mode_idx + 1,
                "Step": step,
                "MAE": float(mae_s),
                "RMSE": float(rmse_s),
                "MAPE (%)": float(mape_s) if not np.isnan(mape_s) else np.nan,
                "SDE": float(sdev_s)
            })

    summary_df = pd.DataFrame(summary_rows).sort_values("Mode").reset_index(drop=True)
    multistep_df = pd.DataFrame(multistep_rows).sort_values(["Mode", "Step"]).reset_index(drop=True)

    return summary_df, multistep_df, boundaries

In [11]:
feature_columns = ['AirTemp','Azimuth','CloudOpacity','DewpointTemp','Dhi','Dni','Ebh',
                   'WindDirection10m','Ghi','RelativeHumidity','SurfacePressure','WindSpeed10m']
df = pd.read_csv('/Users/hrishityelchuri/Documents/windPred/raw/8.52 hrishit data.csv')

df['PeriodEnd'] = pd.to_datetime(df['PeriodEnd'])
df['PeriodStart'] = pd.to_datetime(df['PeriodStart'])
df = df.sort_values('PeriodEnd')

In [12]:
summary_df, multistep_df, boundaries = ewt_gwo_relm_no_iewt_multistep(
    df,
    target_column='WindSpeed10m',
    feature_columns=feature_columns,
    lag_steps=12,
    n_modes=4,
    gwo_agents=12,
    gwo_iters=25,
    random_state=42,
    Hmin=20, Hmax=500,
    Cmin_log=-4, Cmax_log=4,
    smooth_sigma=2,
    max_step_eval=7
)
print("Per-mode one-step summary:")
print(summary_df.to_string(index=False))
print("\nPer-mode multi-step (direct) metrics:")
print(multistep_df.to_string(index=False))

Per-mode one-step summary:
 Mode  n_hidden        C activation  val_rmse_best      MAE     RMSE  MAPE (%)      SDE
    1       300 0.023629    sigmoid       0.281201 0.154500 0.201430  6.450085 0.194043
    2       108 1.348989    sigmoid       0.048985 0.217639 0.287263 20.694048 0.272769
    3       331 0.024686       relu       0.174683 0.182196 0.231931 12.879121 0.231930
    4        44 0.028521    sigmoid       0.243659 0.415023 0.539876 65.157607 0.539293

Per-mode multi-step (direct) metrics:
 Mode  Step      MAE     RMSE   MAPE (%)      SDE
    1     1 0.300395 0.391473  13.300677 0.288986
    1     2 0.300364 0.391472  13.299368 0.289055
    1     3 0.300336 0.391471  13.298135 0.289121
    1     4 0.300307 0.391471  13.296879 0.289186
    1     5 0.300276 0.391470  13.295577 0.289258
    1     6 0.300242 0.391469  13.294200 0.289316
    1     7 0.300215 0.391469  13.292991 0.289383
    2     1 0.267514 0.354597  25.623138 0.321708
    2     2 0.267484 0.354588  25.623859 0.3