In [242]:
import pandas as pd
from sklearn.model_selection import train_test_split
from relm import RELM
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [243]:
df = pd.read_csv('/Users/hrishityelchuri/Documents/windPred/raw/8.52 hrishit data.csv')

In [244]:
df.head(20)

Unnamed: 0,PeriodEnd,PeriodStart,Period,AirTemp,Azimuth,CloudOpacity,DewpointTemp,Dhi,Dni,Ebh,...,GtiFixedTilt,GtiTracking,PrecipitableWater,RelativeHumidity,SnowWater,SurfacePressure,WindDirection10m,WindSpeed10m,Zenith,AlbedoDaily
0,2007-01-01T02:00:00Z,2007-01-01T01:00:00Z,PT60M,22.3,-114,23.0,18.9,33,3,0,...,33,33,22.4,81.2,0.0,999.4,66,3.8,85,0.1
1,2007-01-01T03:00:00Z,2007-01-01T02:00:00Z,PT60M,23.4,-118,13.8,19.2,116,327,122,...,263,440,22.6,77.3,0.0,1000.2,67,3.7,72,0.1
2,2007-01-01T04:00:00Z,2007-01-01T03:00:00Z,PT60M,24.4,-124,0.0,19.2,119,737,379,...,553,813,22.9,72.9,0.0,1000.5,68,3.7,59,0.1
3,2007-01-01T05:00:00Z,2007-01-01T04:00:00Z,PT60M,25.3,-133,0.0,18.9,135,831,561,...,759,879,23.4,68.0,0.0,1000.5,71,3.9,48,0.1
4,2007-01-01T06:00:00Z,2007-01-01T05:00:00Z,PT60M,26.1,-148,4.5,18.7,229,723,564,...,851,858,23.9,63.4,0.0,1000.4,73,4.0,38,0.1
5,2007-01-01T07:00:00Z,2007-01-01T06:00:00Z,PT60M,26.8,-170,7.3,18.5,309,609,514,...,874,829,24.5,60.4,0.0,999.9,75,3.9,33,0.1
6,2007-01-01T08:00:00Z,2007-01-01T07:00:00Z,PT60M,27.2,165,0.0,18.3,204,805,674,...,942,892,25.2,58.6,0.0,999.0,77,3.5,33,0.1
7,2007-01-01T09:00:00Z,2007-01-01T08:00:00Z,PT60M,27.5,145,0.4,18.2,209,747,578,...,847,863,25.9,56.9,0.0,998.0,79,3.1,40,0.1
8,2007-01-01T10:00:00Z,2007-01-01T09:00:00Z,PT60M,27.7,131,2.8,18.4,248,546,355,...,648,755,26.9,56.7,0.0,997.4,83,2.7,50,0.1
9,2007-01-01T11:00:00Z,2007-01-01T10:00:00Z,PT60M,27.8,122,2.7,18.8,195,434,207,...,437,627,28.2,58.1,0.0,997.2,90,2.4,61,0.1


In [245]:
df['PeriodEnd'] = pd.to_datetime(df['PeriodEnd'])

In [246]:
df['PeriodStart'] = pd.to_datetime(df['PeriodStart'])

In [247]:
df = df.sort_values('PeriodEnd')

In [248]:
df['WindSpeed10m']

0         3.8
1         3.7
2         3.7
3         3.9
4         4.0
         ... 
137417    2.0
137418    1.7
137419    1.8
137420    1.7
137421    1.4
Name: WindSpeed10m, Length: 137422, dtype: float64

In [249]:
def create_multivariate_lagged_dataset(df, target_col, feature_cols, lag=3):
    """
    Create supervised learning data from multivariate time series.
    
    Parameters:
    - df: pandas DataFrame containing time series data for multiple variables
    - target_col: string, name of the target variable column (e.g., 'WindSpeed10m')
    - feature_cols: list of strings, names of feature columns to use (including target if desired)
    - lag: number of past time steps to include as input features
    
    Returns:
    - X: 2D NumPy array of shape (samples, features * lag)
    - y: 1D NumPy array of target values (samples,)
    """
    data = df[feature_cols].values
    target_idx = feature_cols.index(target_col)
    
    X, y = [], []
    for i in range(lag, len(df)):
        # extract lagged observations for all features
        X.append(data[i-lag:i].flatten())  # flatten to 1D array of length features*lag
        y.append(data[i, target_idx])
    return np.array(X), np.array(y)

# Set columns you want to use as features
feature_columns = ['AirTemp','Azimuth','CloudOpacity','DewpointTemp','Dhi','Dni','Ebh','WindDirection10m','Ghi','RelativeHumidity','SurfacePressure','WindSpeed10m']
target_column = 'WindSpeed10m'
lag_steps = 3  # number of past hours to use

X, y = create_multivariate_lagged_dataset(df, target_column, feature_columns, lag=lag_steps)

# Now X.shape will be (num_samples, lag_steps * num_features)
# y.shape will be (num_samples,)


In [250]:
wind_series = df['WindSpeed10m']
print("Min wind speed in test set:", wind_series.min())
print("Values < 0.5:", np.sum(wind_series < 0.5))

Min wind speed in test set: 0.0
Values < 0.5: 1572


In [251]:
n_samples = X.shape[0]
train_end = int(0.7 * n_samples)
val_end = int(0.85 * n_samples)

X_train, y_train = X[:train_end], y[:train_end]
X_val, y_val = X[train_end:val_end], y[train_end:val_end]
X_test, y_test = X[val_end:], y[val_end:]

In [252]:
X_train.shape

(96193, 36)

In [253]:
X_train

array([[  22.3, -114. ,   23. , ...,   72.9, 1000.5,    3.7],
       [  23.4, -118. ,   13.8, ...,   68. , 1000.5,    3.9],
       [  24.4, -124. ,    0. , ...,   63.4, 1000.4,    4. ],
       ...,
       [  23.9, -112. ,    0. , ...,   83.8,  997.8,    4.1],
       [  24.2, -113. ,    0. , ...,   83. ,  998.5,    4.5],
       [  24.5, -115. ,    9.4, ...,   79.9,  998.8,    4.7]],
      shape=(96193, 36))

In [254]:
print("Min wind speed in test set:", y_test.min())
print("Values < 0.1:", np.sum(y_test < 0.1))

Min wind speed in test set: 0.0
Values < 0.1: 6


In [255]:
class RELM:
    """
    Regularized Extreme Learning Machine (RELM)
    Supports: 'sigmoid', 'tanh', 'relu', 'rbf' activations.
    """
    def __init__(self, n_hidden=100, activation='sigmoid', C=1.0, random_state=None, rbf_sigma=None):
        self.n_hidden = int(n_hidden)
        self.activation = activation
        self.C = float(C)
        self.random_state = random_state
        self.rbf_sigma = rbf_sigma
        self.is_fitted = False

    def _init_weights(self, n_features):
        rng = np.random.default_rng(self.random_state)
        self.W = rng.normal(size=(self.n_hidden, n_features))
        self.b = rng.normal(size=(self.n_hidden,))
        if self.activation == 'rbf':
            self.W = None
            self.b = None

    def _activation(self, X):
        if self.activation == 'sigmoid':
            X = np.clip(X, -500, 500)
            return 1.0 / (1.0 + np.exp(-X))
        if self.activation == 'tanh':
            return np.tanh(X)
        if self.activation == 'relu':
            return np.maximum(0.0, X)
        if self.activation == 'rbf':
            if self.rbf_sigma is None:
                raise ValueError("rbf_sigma must be provided for RBF activation.")
            return np.exp(- X / (2.0 * (self.rbf_sigma ** 2)))
        raise ValueError(f"Unknown activation: {self.activation}")

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        n_samples, n_features = X.shape

        if self.activation == 'rbf':
            rng = np.random.default_rng(self.random_state)
            indices = rng.choice(n_samples, size=self.n_hidden, replace=False)
            self.centers = X[indices]
            if self.rbf_sigma is None:
                from scipy.spatial.distance import pdist
                if self.n_hidden > 1:
                    d = pdist(self.centers, metric='euclidean')
                    self.rbf_sigma = np.median(d) if d.size > 0 else 1.0
                else:
                    self.rbf_sigma = 1.0
            XC = X[:, None, :] - self.centers[None, :, :]
            sqdist = np.sum(XC**2, axis=2)
            H = self._activation(sqdist)
        else:
            self._init_weights(n_features)
            Z = X @ self.W.T + self.b
            H = self._activation(Z)

        HT = H.T
        A = HT @ H
        reg = (1.0 / self.C) * np.eye(A.shape[0])
        A_reg = A + reg
        RHS = HT @ y
        self.beta = np.linalg.solve(A_reg, RHS)
        self.is_fitted = True
        return self

    def predict(self, X):
        if not self.is_fitted:
            raise RuntimeError("Model not fitted.")
        X = np.asarray(X)
        if self.activation == 'rbf':
            XC = X[:, None, :] - self.centers[None, :, :]
            sqdist = np.sum(XC**2, axis=2)
            H = self._activation(sqdist)
        else:
            Z = X @ self.W.T + self.b
            H = self._activation(Z)
        Ypred = H @ self.beta
        return Ypred.ravel() if Ypred.shape[1] == 1 else Ypred

    def rmse(self, X, y):
        return np.sqrt(mean_squared_error(y, self.predict(X)))

In [256]:
def create_multistep_targets(X, y, max_step=7):
    targets = []
    for step in range(1, max_step + 1):
        # For step-ahead, shift y by 'step' positions forward
        target = y[step:]
        # Make X and target same length
        X_valid = X[:-step]  # last 'step' samples get dropped
        targets.append((X_valid, target))
    return targets


In [257]:
model = RELM(n_hidden=100, C=1.0, activation='sigmoid')

In [258]:
model.fit(X_train, y_train)

<__main__.RELM at 0x163b46a50>

In [259]:
y_pred_val = model.predict(X_val)
y_pred_test = model.predict(X_test)

In [260]:
def safe_mape(y_true, y_pred, min_denom=1.0):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = np.abs(y_true) >= min_denom
    if np.sum(mask) == 0:
        return np.nan
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

In [261]:
def sde(y_true, y_pred):
    return np.std(y_true - y_pred)

In [262]:
max_step = 7

In [263]:
multistep_data = create_multistep_targets(X_test, y_test, max_step=max_step)

In [264]:
results = []

for step, (step_X, step_y) in enumerate(multistep_data, 1):
    # Predict for step-ahead
    y_pred_step = model.predict(step_X)
    
    mae = mean_absolute_error(step_y, y_pred_step)
    rmse = np.sqrt(mean_squared_error(step_y, y_pred_step))
    mape_val = safe_mape(step_y, y_pred_step)
    sde_val = sde(step_y, y_pred_step)
    
    results.append({
        "Step": step,
        "MAE": mae,
        "RMSE": rmse,
        "MAPE (%)": mape_val,
        "SDE": sde_val
    })

In [265]:
results_df = pd.DataFrame(results)
print(results_df)

   Step       MAE      RMSE   MAPE (%)       SDE
0     1  1.462659  1.750006  64.631670  1.306664
1     2  1.458132  1.748100  64.760887  1.304132
2     3  1.456090  1.749890  64.829068  1.306529
3     4  1.457942  1.754124  64.984648  1.312181
4     5  1.461666  1.759707  65.239470  1.319618
5     6  1.466795  1.766285  65.735424  1.328341
6     7  1.474170  1.774497  66.401474  1.339195
