In [27]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.signal import convolve

# ============================================
# 1. db4 filter coefficients (analysis & synthesis)
# ============================================
db4_lo = np.array([
    -0.010597401785, 0.032883011667, 0.030841381836,
    -0.187034811719, -0.027983769417, 0.630880767930,
    0.714846570553, 0.230377813309
])
db4_hi = np.array([
    -0.230377813309, 0.714846570553, -0.630880767930,
    -0.027983769417, 0.187034811719, 0.030841381836,
    -0.032883011667, -0.010597401785
])

g0 = db4_lo[::-1]
g1 = db4_hi[::-1]

# ============================================
# 2. Convolution with symmetric extension
# ============================================
def conv_reflect_same(x, filt):
    L = len(filt)
    ext = L // 2
    x_ext = np.pad(x, (ext, ext-1), mode='symmetric')
    y = convolve(x_ext, filt, mode='valid')
    return y

def downsample2(x):
    return x[::2]

def upsample2(x):
    up = np.zeros(len(x) * 2)
    up[::2] = x
    return up

# ============================================
# 3. DWT decomposition
# ============================================
def wt_db4_decompose(x, level=3):
    cA = x.copy()
    cD_list = []
    for _ in range(level):
        a = conv_reflect_same(cA, db4_lo)
        d = conv_reflect_same(cA, db4_hi)
        cA = downsample2(a)
        cD = downsample2(d)
        cD_list.append(cD)
    return cA, cD_list  # cA at final level, list of cD's (level order)

# ============================================
# 4. Inverse reconstruction (one component at a time)
# ============================================
def inverse_from_coeffs(cA_L, cD_list, keep='A', keep_index=0):
    cA_current = cA_L if keep == 'A' and keep_index == 0 else np.zeros_like(cA_L)
    for k in range(len(cD_list)):
        cD_current = cD_list[k] if (keep == 'D' and keep_index == k) else np.zeros_like(cD_list[k])
        a_up = upsample2(cA_current)
        d_up = upsample2(cD_current)
        a_rec = conv_reflect_same(a_up, g0)
        d_rec = conv_reflect_same(d_up, g1)
        # 🔹 Fix: match lengths
        min_len = min(len(a_rec), len(d_rec))
        a_rec = a_rec[:min_len]
        d_rec = d_rec[:min_len]
        cA_current = a_rec + d_rec
    return cA_current

# ============================================
# 5. RELM model
# ============================================
class RELM:
    def __init__(self, n_hidden=100, activation='tanh', C=1.0, random_state=None):
        self.n_hidden = int(n_hidden)
        self.activation = activation
        self.C = float(C)
        self.random_state = random_state
        self.is_fitted = False

    def _init_weights(self, n_features):
        rng = np.random.default_rng(self.random_state)
        self.W = rng.uniform(-1, 1, size=(self.n_hidden, n_features))
        self.b = rng.uniform(-1, 1, size=(self.n_hidden,))

    def _activation(self, X):
        if self.activation == 'sigmoid':
            X = np.clip(X, -500, 500)
            return 1.0 / (1.0 + np.exp(-X))
        if self.activation == 'tanh':
            return np.tanh(X)
        if self.activation == 'relu':
            return np.maximum(0.0, X)
        raise ValueError(f"Unknown activation: {self.activation}")

    def fit(self, X, y):
        X, y = np.asarray(X), np.asarray(y)
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        N, d = X.shape
        self._init_weights(d)
        H = self._activation(X @ self.W.T + self.b)
        if N >= self.n_hidden:
            A = (np.eye(self.n_hidden) / self.C) + (H.T @ H)
            B = H.T @ y
            self.beta = np.linalg.solve(A, B)
        else:
            A = (np.eye(N) / self.C) + (H @ H.T)
            B = y
            self.beta = H.T @ np.linalg.solve(A, B)
        self.is_fitted = True
        return self

    def predict(self, X):
        H = self._activation(np.asarray(X) @ self.W.T + self.b)
        Y = H @ self.beta
        return Y.ravel() if Y.shape[1] == 1 else Y

# ============================================
# 6. GWO optimizer
# ============================================
class GWO:
    def __init__(self, obj_func, lb, ub, dim, n_agents=12, n_iter=25, seed=42):
        self.obj_func = obj_func
        self.lb = np.array(lb)
        self.ub = np.array(ub)
        self.dim = dim
        self.n_agents = n_agents
        self.n_iter = n_iter
        self.rng = np.random.default_rng(seed)

    def optimize(self):
        wolves = self.rng.uniform(self.lb, self.ub, size=(self.n_agents, self.dim))
        fitness = np.array([self.obj_func(w) for w in wolves])
        idx = np.argsort(fitness)
        alpha, beta, delta = wolves[idx[0]], wolves[idx[1]], wolves[idx[2]]
        f_alpha, f_beta, f_delta = fitness[idx[0]], fitness[idx[1]], fitness[idx[2]]

        for t in range(self.n_iter):
            a = 2 - 2 * (t / (self.n_iter - 1 + 1e-9))
            for i in range(self.n_agents):
                X = wolves[i].copy()
                for j in range(self.dim):
                    for leader in [alpha, beta, delta]:
                        r1, r2 = self.rng.random(), self.rng.random()
                        A = 2 * a * r1 - a
                        C = 2 * r2
                        D = abs(C * leader[j] - X[j])
                        X[j] = leader[j] - A * D
                wolves[i] = np.clip(X / 3.0, self.lb, self.ub)
            fitness = np.array([self.obj_func(w) for w in wolves])
            idx = np.argsort(fitness)
            alpha, beta, delta = wolves[idx[0]], wolves[idx[1]], wolves[idx[2]]
            f_alpha, f_beta, f_delta = fitness[idx[0]], fitness[idx[1]], fitness[idx[2]]
        return alpha, f_alpha

# ============================================
# 7. Helper functions
# ============================================
def decode_relm_params(position, Hmin=20, Hmax=500):
    h_raw, c_log, a_raw = position
    n_hidden = int(np.round(Hmin + h_raw * (Hmax - Hmin)))
    C = 10.0 ** float(c_log)
    activation = ['tanh', 'sigmoid', 'relu'][int(np.round(np.clip(a_raw, 0, 2)))]
    return n_hidden, C, activation

def safe_mape(y_true, y_pred, min_denom=1.0):
    mask = np.abs(y_true) >= min_denom
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100 if np.sum(mask) else np.nan

def sde(y_true, y_pred):
    return np.std(y_true - y_pred)

def create_multivariate_lagged_dataset(df, target_col, feature_cols, lag=3):
    data = df[feature_cols].values
    target_idx = feature_cols.index(target_col)
    X, y = [], []
    for i in range(lag, len(df)):
        X.append(data[i-lag:i].flatten())
        y.append(data[i, target_idx])
    return np.array(X), np.array(y)

def create_multistep_targets(X, y, max_step=7):
    targets = []
    for step in range(1, max_step + 1):
        targets.append((X[:-step], y[step:]))
    return targets

# ============================================
# 8. Main WT–GWO–RELM pipeline
# ============================================
def wt_gwo_relm_pipeline_scipy_db4(df, target_column, feature_columns, lag_steps=12, level=3,
                                   gwo_agents=12, gwo_iters=25, random_state=42,
                                   Hmin=20, Hmax=400, max_step_eval=7):

    signal = df[target_column].values
    cA_L, cD_list = wt_db4_decompose(signal, level=level)

    components = [inverse_from_coeffs(cA_L, cD_list, keep='A', keep_index=0)]
    for k in range(len(cD_list)):
        components.append(inverse_from_coeffs(cA_L, cD_list, keep='D', keep_index=k))

    comp_labels = ['A_L'] + [f'D_{level - i}' for i in range(len(cD_list))]
    results_per_comp, multi_results_per_comp = [], {}

    for comp_idx, comp_signal in enumerate(components):
        # 🔹 Fix: align lengths before assignment
        min_len = min(len(df), len(comp_signal))
        df_comp = df.iloc[:min_len].copy()
        df_comp[target_column] = comp_signal[:min_len]

        X, y = create_multivariate_lagged_dataset(df_comp, target_column, feature_columns, lag=lag_steps)

        n_samples = X.shape[0]
        train_end = int(0.7 * n_samples)
        val_end = int(0.85 * n_samples)

        X_train, y_train = X[:train_end], y[:train_end]
        X_val, y_val = X[train_end:val_end], y[train_end:val_end]
        X_test, y_test = X[val_end:], y[val_end:]

        scaler = StandardScaler()
        X_train_s = scaler.fit_transform(X_train)
        X_val_s = scaler.transform(X_val)

        def obj_func(position):
            n_hidden, C, activation = decode_relm_params(position, Hmin, Hmax)
            try:
                model = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=random_state)
                model.fit(X_train_s, y_train)
                y_pred = model.predict(X_val_s)
                return np.sqrt(mean_squared_error(y_val, y_pred))
            except:
                return 1e6

        lb = np.array([0.0, -4, 0.0])
        ub = np.array([1.0, 4, 2.0])
        gwo = GWO(obj_func, lb, ub, dim=3, n_agents=gwo_agents, n_iter=gwo_iters, seed=random_state+comp_idx)
        best_pos, _ = gwo.optimize()
        n_hidden, C, activation = decode_relm_params(best_pos, Hmin, Hmax)

        X_trval = np.vstack([X_train, X_val])
        y_trval = np.concatenate([y_train, y_val])
        X_trval_s = scaler.fit_transform(X_trval)
        X_test_s = scaler.transform(X_test)

        model = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=random_state)
        model.fit(X_trval_s, y_trval)
        y_pred_test = model.predict(X_test_s)

        results_per_comp.append({
            "Component": comp_labels[comp_idx],
            "n_hidden": n_hidden, "C": C, "activation": activation,
            "MAE": mean_absolute_error(y_test, y_pred_test),
            "RMSE": np.sqrt(mean_squared_error(y_test, y_pred_test)),
            "MAPE (%)": safe_mape(y_test, y_pred_test),
            "SDE": sde(y_test, y_pred_test)
        })

        # Multi-step metrics
        multi_metrics = []
        multistep_data = create_multistep_targets(X_test, y_test, max_step=max_step_eval)
        for step, (step_X, step_y) in enumerate(multistep_data, 1):
            step_X_s = scaler.transform(step_X)
            y_pred_step = model.predict(step_X_s)
            multi_metrics.append({
                "Step": step,
                "MAE": mean_absolute_error(step_y, y_pred_step),
                "RMSE": np.sqrt(mean_squared_error(step_y, y_pred_step)),
                "MAPE (%)": safe_mape(step_y, y_pred_step),
                "SDE": sde(step_y, y_pred_step)
            })
        multi_results_per_comp[comp_labels[comp_idx]] = multi_metrics

    return {"per_comp_info": results_per_comp, "multistep_per_comp": multi_results_per_comp}


In [28]:
res = wt_gwo_relm_pipeline_scipy_db4(
    df,
    target_column="WindSpeed10m",
    feature_columns=[
        'AirTemp','Azimuth','CloudOpacity','DewpointTemp','Dhi','Dni',
        'Ebh','WindDirection10m','Ghi','RelativeHumidity','SurfacePressure','WindSpeed10m'
    ],
    lag_steps=12,
    level=3,
    gwo_agents=8,
    gwo_iters=10,
    random_state=42,
    Hmin=20, Hmax=200,
    max_step_eval=5
)


In [29]:
print("\n=== One-step metrics per component ===")
print(pd.DataFrame(res['per_comp_info']))

print("\n=== Multi-step metrics for first component ===")
comp_name = list(res['multistep_per_comp'].keys())[0]
print(f"Component: {comp_name}")
print(pd.DataFrame(res['multistep_per_comp'][comp_name]))


=== One-step metrics per component ===
  Component  n_hidden         C activation       MAE      RMSE   MAPE (%)  \
0       A_L        20  1.000019       tanh  2.743945  3.234403  64.471782   
1       D_3        20  0.999995       tanh  0.012201  0.016758        NaN   
2       D_2        20  1.000006       tanh  0.078250  0.103246        NaN   
3       D_1        20  1.000000       tanh  0.570033  0.743707  72.518702   

        SDE  
0  2.097229  
1  0.016752  
2  0.103034  
3  0.743383  

=== Multi-step metrics for first component ===
Component: A_L
   Step       MAE      RMSE   MAPE (%)       SDE
0     1  2.743480  3.234232  64.524750  2.096447
1     2  2.743462  3.234464  64.273713  2.096146
2     3  2.744094  3.236095  64.222175  2.097480
3     4  2.745721  3.239437  64.113392  2.101649
4     5  2.747975  3.244127  64.204394  2.107869
