In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.fft import fft, ifft, fftfreq
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks

In [26]:
df = pd.read_csv('/Users/hrishityelchuri/Documents/windPred/raw/8.52 hrishit data.csv')

In [27]:
df['PeriodEnd'] = pd.to_datetime(df['PeriodEnd'])
df['PeriodStart'] = pd.to_datetime(df['PeriodStart'])

In [28]:
df = df.sort_values('PeriodEnd')

In [29]:
def create_multivariate_lagged_dataset(df, target_col, feature_cols, lag=3):
    data = df[feature_cols].values
    target_idx = feature_cols.index(target_col)
    
    X, y = [], []
    for i in range(lag, len(df)):
        # extract lagged observations for all features
        X.append(data[i-lag:i].flatten())  # flatten to 1D array of length features*lag
        y.append(data[i, target_idx])
    return np.array(X), np.array(y)

In [30]:
def safe_mape(y_true, y_pred, min_denom=1.0):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = np.abs(y_true) >= min_denom
    if np.sum(mask) == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def sde(y_true, y_pred):
    return float(np.std(np.array(y_true) - np.array(y_pred)))

In [31]:
def ewt_boundaries_equal_energy(spectrum, N):
    """Fallback: Equal-energy segmentation."""
    energy = np.cumsum(spectrum) / np.sum(spectrum)
    boundaries = []
    for k in range(1, N):
        idx = np.argmin(np.abs(energy - k / N))
        boundaries.append(idx / len(spectrum) * np.pi)
    return boundaries

def ewt_boundaries(spectrum, N, smooth_sigma=2):
    """
    Spectrum-peak-based boundary detection from EWT paper.
    If not enough peaks found, falls back to equal-energy method.
    """
    # Smooth spectrum
    spectrum_smooth = gaussian_filter1d(spectrum, sigma=smooth_sigma)

    # Find peaks
    peaks, _ = find_peaks(spectrum_smooth)

    if len(peaks) < N - 1:
        print("[EWT] Warning: Not enough peaks found, falling back to equal-energy boundaries.")
        return ewt_boundaries_equal_energy(spectrum, N)

    # Sort peaks by amplitude
    prominences = spectrum_smooth[peaks]
    sorted_peaks = [p for _, p in sorted(zip(prominences, peaks), reverse=True)]
    
    # Take N-1 most prominent
    sorted_peaks = sorted(sorted_peaks[:N-1])

    # Convert to radian boundaries
    boundaries = [p / len(spectrum) * np.pi for p in sorted_peaks]
    return boundaries

def make_filter_bank(boundaries, L):
    freqs = np.linspace(0, np.pi, L//2+1)
    mfb = []

    # Scaling (low-pass)
    phi = np.zeros_like(freqs)
    phi[freqs <= boundaries[0]] = 1
    mfb.append(phi)

    # Wavelets
    for i in range(len(boundaries)):
        psi = np.zeros_like(freqs)
        if i == len(boundaries) - 1:
            mask = (freqs > boundaries[i])
        else:
            mask = (freqs > boundaries[i]) & (freqs <= boundaries[i+1])
        psi[mask] = 1
        mfb.append(psi)
    return mfb

def EWT1D(signal, N=3, smooth_sigma=2):
    """EWT using spectrum-peak-based boundaries."""
    L = len(signal)
    spectrum = np.abs(fft(signal))[:L//2+1]
    boundaries = ewt_boundaries(spectrum, N, smooth_sigma=smooth_sigma)
    mfb = make_filter_bank(boundaries, L)

    modes = []
    spectrum_full = fft(signal)
    for filt in mfb:
        filt_full = np.concatenate([filt, filt[-2:0:-1]])
        mode_freq = spectrum_full * filt_full
        mode_time = np.real(ifft(mode_freq))
        modes.append(mode_time)
    return np.array(modes), mfb, boundaries

def iEWT1D(modes, mfb):
    """Inverse EWT."""
    return np.sum(modes, axis=0)

In [32]:
class RELM:
    def __init__(self, n_hidden=100, activation='tanh', C=1.0, random_state=None):
        self.n_hidden = int(n_hidden)
        self.activation = activation
        self.C = float(C)
        self.random_state = random_state
        self.is_fitted = False

    def _init_weights(self, n_features):
        rng = np.random.default_rng(self.random_state)
        self.W = rng.uniform(-1, 1, size=(self.n_hidden, n_features))
        self.b = rng.uniform(-1, 1, size=(self.n_hidden,))

    def _activation(self, X):
        if self.activation == 'sigmoid':
            X = np.clip(X, -500, 500)
            return 1.0 / (1.0 + np.exp(-X))
        if self.activation == 'tanh':
            return np.tanh(X)
        if self.activation == 'relu':
            return np.maximum(0.0, X)
        raise ValueError(f"Unknown activation: {self.activation}")

    def fit(self, X, y):
        X, y = np.asarray(X), np.asarray(y)
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        N, d = X.shape
        self._init_weights(d)
        H = self._activation(X @ self.W.T + self.b)

        if N >= self.n_hidden:
            A = (np.eye(self.n_hidden) / self.C) + (H.T @ H)
            B = H.T @ y
            self.beta = np.linalg.solve(A, B)
        else:
            A = (np.eye(N) / self.C) + (H @ H.T)
            B = y
            self.beta = H.T @ np.linalg.solve(A, B)
        self.is_fitted = True
        return self

    def predict(self, X):
        if not self.is_fitted:
            raise RuntimeError("Model not fitted.")
        H = self._activation(np.asarray(X) @ self.W.T + self.b)
        Y = H @ self.beta
        return Y.ravel() if Y.shape[1] == 1 else Y

In [33]:
def ewt_relm_iewt_pipeline(df, target_column, feature_columns, lag_steps=12, n_modes=4,
                           n_hidden=100, C=1.0, activation='tanh', max_step_eval=7):

    # Step 1: EWT decomposition of target
    signal = df[target_column].values
    ewt_modes, mfb, boundaries = EWT1D(signal, N=n_modes)

    # Split indexes after lag
    n_samples = len(signal) - lag_steps
    train_end = int(0.7 * n_samples)
    val_end = int(0.85 * n_samples)

    # Step 2: Train RELM for each mode
    mode_preds_test = []
    for mode_idx in range(n_modes):
        mode_series = ewt_modes[mode_idx, :]
        df_mode = df.copy()
        df_mode[target_column] = mode_series

        X_mode, y_mode = create_multivariate_lagged_dataset(df_mode, target_column, feature_columns, lag=lag_steps)
        X_train, y_train = X_mode[:train_end], y_mode[:train_end]
        X_test = X_mode[val_end:]

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=42)
        model.fit(X_train_scaled, y_train)

        y_pred_test_mode = model.predict(X_test_scaled)
        mode_preds_test.append(y_pred_test_mode)

    # Step 3: IEWT reconstruction
    mode_preds_test = np.array(mode_preds_test)
    y_pred_final = iEWT1D(mode_preds_test, mfb)

    # Step 4: Metrics (one-step)
    y_true_test = df[target_column].values[lag_steps+val_end:]
    mae = mean_absolute_error(y_true_test, y_pred_final)
    rmse_val = np.sqrt(mean_squared_error(y_true_test, y_pred_final))
    mape_val = safe_mape(y_true_test, y_pred_final)
    sde_val = sde(y_true_test, y_pred_final)

    print("\n=== One-step Test Metrics ===")
    print(f"MAE  : {mae:.6f}")
    print(f"RMSE : {rmse_val:.6f}")
    print(f"MAPE : {mape_val:.3f}%")
    print(f"SDE  : {sde_val:.6f}")

    # Step 5: Multi-step metrics
    multistep_rows = []
    for step in range(1, max_step_eval + 1):
        mode_preds_step = []
        for mode_idx in range(n_modes):
            mode_series = ewt_modes[mode_idx, :]
            df_mode = df.copy()
            df_mode[target_column] = mode_series

            X_mode, y_mode = create_multivariate_lagged_dataset(df_mode, target_column, feature_columns, lag=lag_steps)
            X_train, y_train = X_mode[:train_end], y_mode[:train_end]
            X_test_step = X_mode[val_end:-step]
            y_test_step = y_mode[val_end+step:]

            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test_step)

            model = RELM(n_hidden=n_hidden, activation=activation, C=C, random_state=42)
            model.fit(X_train_scaled, y_train)
            mode_preds_step.append(model.predict(X_test_scaled))

        mode_preds_step = np.array(mode_preds_step)
        y_pred_step_final = iEWT1D(mode_preds_step, mfb)
        y_true_step = df[target_column].values[lag_steps+val_end+step:]

        multistep_rows.append({
            "Step": step,
            "MAE": mean_absolute_error(y_true_step, y_pred_step_final),
            "RMSE": np.sqrt(mean_squared_error(y_true_step, y_pred_step_final)),
            "MAPE (%)": safe_mape(y_true_step, y_pred_step_final),
            "SDE": sde(y_true_step, y_pred_step_final)
        })

    multistep_df = pd.DataFrame(multistep_rows)
    print("\n=== Multi-step Test Metrics ===")
    print(multistep_df.to_string(index=False))

    return mae, rmse_val, mape_val, sde_val, multistep_df


In [34]:
feature_columns = ['AirTemp','Azimuth','CloudOpacity','DewpointTemp','Dhi','Dni','Ebh',
                    'WindDirection10m','Ghi','RelativeHumidity','SurfacePressure','WindSpeed10m']
target_column = 'WindSpeed10m'

In [35]:
ewt_relm_iewt_pipeline(df, target_column, feature_columns,
                       lag_steps=12, n_modes=4,
                       n_hidden=100, C=1.0, activation='tanh',
                       max_step_eval=7)


=== One-step Test Metrics ===
MAE  : 2.889451
RMSE : 3.419612
MAPE : 110.405%
SDE  : 2.094025

=== Multi-step Test Metrics ===
 Step      MAE     RMSE   MAPE (%)      SDE
    1 2.907520 3.463841 110.307476 2.165256
    2 2.927575 3.506699 110.465468 2.233091
    3 2.946537 3.541221 110.364851 2.286986
    4 2.963732 3.567115 110.629683 2.326834
    5 2.976152 3.584364 110.904148 2.353136
    6 2.980362 3.589067 110.728046 2.360247
    7 2.973807 3.577168 110.326572 2.342320


(2.8894509619518502,
 np.float64(3.4196123441891224),
 np.float64(110.40465491962148),
 2.0940245052461046,
    Step       MAE      RMSE    MAPE (%)       SDE
 0     1  2.907520  3.463841  110.307476  2.165256
 1     2  2.927575  3.506699  110.465468  2.233091
 2     3  2.946537  3.541221  110.364851  2.286986
 3     4  2.963732  3.567115  110.629683  2.326834
 4     5  2.976152  3.584364  110.904148  2.353136
 5     6  2.980362  3.589067  110.728046  2.360247
 6     7  2.973807  3.577168  110.326572  2.342320)