In [None]:
# pip install hpelm


EML for horizon 1

In [None]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
from hpelm import ELM
import warnings

np.random.seed(42)
warnings.filterwarnings('ignore')

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("The column 'TIME' does not exist in the dataset.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h1'
    if target_col not in df.columns:
        raise KeyError(f"Target column '{target_col}' not found.")
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()
    df['month'] = df.index.month
    df['weekofyear'] = df.index.isocalendar().week
    df['year'] = df.index.isocalendar().year
    df['sin_week'] = np.sin(2 * np.pi * df['weekofyear'] / 52)
    df['cos_week'] = np.cos(2 * np.pi * df['weekofyear'] / 52)
    df['EMA_3'] = df[target_col].ewm(span=3, adjust=False).mean()
    df['EMA_6'] = df[target_col].ewm(span=6, adjust=False).mean()
    df['correlation_target_month'] = df[target_col].rolling(6).corr(df['month'])
    df['correlation_target_week'] = df[target_col].rolling(6).corr(df['weekofyear'])

    fft_vals = fft(df[target_col].dropna().values)
    fft_real = np.real(fft_vals)[:len(df)]
    fft_imag = np.imag(fft_vals)[:len(df)]
    df['fft_real'] = np.concatenate([fft_real, np.full(len(df) - len(fft_real), np.nan)])
    df['fft_imag'] = np.concatenate([fft_imag, np.full(len(df) - len(fft_imag), np.nan)])
    df.fillna(df.median(), inplace=True)

    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def objective_function(neurons, X_train, y_train, X_val, y_val):
    neurons = int(neurons)
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]
    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")
    elm.train(X_train, y_train_onehot)
    y_pred = elm.predict(X_val).argmax(axis=1)
    return accuracy_score(y_val, y_pred)

def optimize_hyperparameters(X_train, y_train):
    X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    pbounds = {'neurons': (32, 128)}
    optimizer = BayesianOptimization(
        f=lambda neurons: objective_function(neurons, X_tr, y_tr, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    print("\n Best Hyperparameters Found:")
    print(optimizer.max)
    return int(optimizer.max['params']['neurons'])

def build_elm_model(X_train, y_train, X_test, y_test, neurons=100):
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]

    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")

    start_train = time.time()
    elm.train(X_train, y_train_onehot)
    training_time = time.time() - start_train

    start_test = time.time()
    y_pred = elm.predict(X_test)
    testing_time = time.time() - start_test
    inference_time = testing_time / len(X_test)

    y_pred_labels = y_pred.argmax(axis=1)
    acc = accuracy_score(y_test, y_pred_labels)

    # Manual model size estimate (approximate)
    float_size_bytes = 8
    input_dim = X_train.shape[1]
    output_dim = classes
    total_params = neurons * (input_dim + output_dim)
    model_size = total_params * float_size_bytes / (1024 ** 2)  # in MB

    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size (est.): {model_size:.2f} MB")
    print(f"Trainable Params (est.): {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")
    print(f"Accuracy: {acc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred_labels, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_labels))

    return elm

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H1_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Performing Bayesian Optimization for ELM...")
    best_neurons = optimize_hyperparameters(X_train, y_train)
    print(f"\n Optimal Number of Neurons: {best_neurons}")

    print("\n Training Final Model with Optimal Parameters...")
    build_elm_model(X_train, y_train, X_test, y_test, neurons=best_neurons)

if __name__ == "__main__":
    main()



🔍 Performing Bayesian Optimization for ELM...
|   iter    |  target   |  neurons  |
-------------------------------------
| [39m1        [39m | [39m0.9431   [39m | [39m67.96    [39m |
| [35m2        [39m | [35m0.9715   [39m | [35m123.3    [39m |
| [39m3        [39m | [39m0.9593   [39m | [39m102.3    [39m |
| [39m4        [39m | [39m0.9268   [39m | [39m89.47    [39m |
| [39m5        [39m | [39m0.9309   [39m | [39m46.98    [39m |
| [39m6        [39m | [39m0.9675   [39m | [39m123.2    [39m |
| [39m7        [39m | [39m0.9512   [39m | [39m123.3    [39m |
| [39m8        [39m | [39m0.939    [39m | [39m64.22    [39m |
| [39m9        [39m | [39m0.9553   [39m | [39m102.3    [39m |
| [39m10       [39m | [39m0.9431   [39m | [39m49.46    [39m |
| [39m11       [39m | [39m0.9593   [39m | [39m123.3    [39m |
| [39m12       [39m | [39m0.935    [39m | [39m37.5     [39m |
| [39m13       [39m | [39m0.9715   [39m | [39m123.3    

EML for Horizon 2

In [None]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
from hpelm import ELM
import warnings

np.random.seed(42)
warnings.filterwarnings('ignore')

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("The column 'TIME' does not exist in the dataset.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h2'
    if target_col not in df.columns:
        raise KeyError(f"Target column '{target_col}' not found.")
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    
    df.fillna(df.median(), inplace=True)

    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def objective_function(neurons, X_train, y_train, X_val, y_val):
    neurons = int(neurons)
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]
    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")
    elm.train(X_train, y_train_onehot)
    y_pred = elm.predict(X_val).argmax(axis=1)
    return accuracy_score(y_val, y_pred)

def optimize_hyperparameters(X_train, y_train):
    X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    pbounds = {'neurons': (32, 128)}
    optimizer = BayesianOptimization(
        f=lambda neurons: objective_function(neurons, X_tr, y_tr, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    print("\n Best Hyperparameters Found:")
    print(optimizer.max)
    return int(optimizer.max['params']['neurons'])

def build_elm_model(X_train, y_train, X_test, y_test, neurons=100):
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]

    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")

    start_train = time.time()
    elm.train(X_train, y_train_onehot)
    training_time = time.time() - start_train

    start_test = time.time()
    y_pred = elm.predict(X_test)
    testing_time = time.time() - start_test
    inference_time = testing_time / len(X_test)

    y_pred_labels = y_pred.argmax(axis=1)
    acc = accuracy_score(y_test, y_pred_labels)

    # Manual model size estimate (approximate)
    float_size_bytes = 8
    input_dim = X_train.shape[1]
    output_dim = classes
    total_params = neurons * (input_dim + output_dim)
    model_size = total_params * float_size_bytes / (1024 ** 2)  # in MB

    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size (est.): {model_size:.2f} MB")
    print(f"Trainable Params (est.): {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")
    print(f"Accuracy: {acc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred_labels, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_labels))

    return elm

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\Lasso_Selected_Features_H2\train_lasso_selected.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\Lasso_Selected_Features_H2\test_lasso_selected.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Performing Bayesian Optimization for ELM...")
    best_neurons = optimize_hyperparameters(X_train, y_train)
    print(f"\n Optimal Number of Neurons: {best_neurons}")

    print("\n Training Final Model with Optimal Parameters...")
    build_elm_model(X_train, y_train, X_test, y_test, neurons=best_neurons)

if __name__ == "__main__":
    main()


EML for Horizon 3

In [None]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
from hpelm import ELM
import warnings

np.random.seed(42)
warnings.filterwarnings('ignore')

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("The column 'TIME' does not exist in the dataset.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h3'
    if target_col not in df.columns:
        raise KeyError(f"Target column '{target_col}' not found.")
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df.fillna(df.median(), inplace=True)

    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def objective_function(neurons, X_train, y_train, X_val, y_val):
    neurons = int(neurons)
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]
    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")
    elm.train(X_train, y_train_onehot)
    y_pred = elm.predict(X_val).argmax(axis=1)
    return accuracy_score(y_val, y_pred)

def optimize_hyperparameters(X_train, y_train):
    X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    pbounds = {'neurons': (32, 128)}
    optimizer = BayesianOptimization(
        f=lambda neurons: objective_function(neurons, X_tr, y_tr, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    print("\n Best Hyperparameters Found:")
    print(optimizer.max)
    return int(optimizer.max['params']['neurons'])

def build_elm_model(X_train, y_train, X_test, y_test, neurons=100):
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]

    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")

    start_train = time.time()
    elm.train(X_train, y_train_onehot)
    training_time = time.time() - start_train

    start_test = time.time()
    y_pred = elm.predict(X_test)
    testing_time = time.time() - start_test
    inference_time = testing_time / len(X_test)

    y_pred_labels = y_pred.argmax(axis=1)
    acc = accuracy_score(y_test, y_pred_labels)

    # Manual model size estimate (approximate)
    float_size_bytes = 8
    input_dim = X_train.shape[1]
    output_dim = classes
    total_params = neurons * (input_dim + output_dim)
    model_size = total_params * float_size_bytes / (1024 ** 2)  # in MB

    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size (est.): {model_size:.2f} MB")
    print(f"Trainable Params (est.): {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")
    print(f"Accuracy: {acc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred_labels, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_labels))

    return elm

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H3_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H3_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Performing Bayesian Optimization for ELM...")
    best_neurons = optimize_hyperparameters(X_train, y_train)
    print(f"\n Optimal Number of Neurons: {best_neurons}")

    print("\n Training Final Model with Optimal Parameters...")
    build_elm_model(X_train, y_train, X_test, y_test, neurons=best_neurons)

if __name__ == "__main__":
    main()


EML for Horizon 4

In [None]:
import pandas as pd
import numpy as np
import time
import os
import psutil
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
from scipy.fftpack import fft
from hpelm import ELM
import warnings

np.random.seed(42)
warnings.filterwarnings('ignore')

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.replace('"', '').str.strip()
    if 'TIME' not in df.columns:
        raise KeyError("The column 'TIME' does not exist in the dataset.")
    df['TIME'] = pd.to_datetime(df['TIME'], errors='coerce')
    df.dropna(subset=['TIME'], inplace=True)
    df.set_index('TIME', inplace=True)
    df = df.apply(pd.to_numeric, errors='coerce')
    df.dropna(axis=1, how='all', inplace=True)

    target_col = 'fenologia_h4'
    if target_col not in df.columns:
        raise KeyError(f"Target column '{target_col}' not found.")
    df[target_col].interpolate(method='linear', inplace=True)
    df[target_col] = df[target_col] - 1

    for i in range(1, 4):
        df[f'{target_col}_lag{i}'] = df[target_col].shift(i)
    for window in [3, 6]:
        df[f'{target_col}_roll_mean_{window}'] = df[target_col].rolling(window).mean()
        df[f'{target_col}_roll_std_{window}'] = df[target_col].rolling(window).std()

    df.fillna(df.median(), inplace=True)

    return df, target_col

def feature_scaling(df, target_col):
    X = df.drop(columns=[target_col])
    y = df[target_col].astype(int)
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

def objective_function(neurons, X_train, y_train, X_val, y_val):
    neurons = int(neurons)
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]
    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")
    elm.train(X_train, y_train_onehot)
    y_pred = elm.predict(X_val).argmax(axis=1)
    return accuracy_score(y_val, y_pred)

def optimize_hyperparameters(X_train, y_train):
    X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
    pbounds = {'neurons': (32, 128)}
    optimizer = BayesianOptimization(
        f=lambda neurons: objective_function(neurons, X_tr, y_tr, X_val, y_val),
        pbounds=pbounds,
        random_state=42,
        verbose=2
    )
    optimizer.maximize(init_points=5, n_iter=10)
    print("\n Best Hyperparameters Found:")
    print(optimizer.max)
    return int(optimizer.max['params']['neurons'])

def build_elm_model(X_train, y_train, X_test, y_test, neurons=100):
    classes = len(np.unique(y_train))
    y_train_onehot = np.eye(classes)[y_train]

    elm = ELM(X_train.shape[1], classes, classification="c", norm=1)
    elm.add_neurons(neurons, "sigm")

    start_train = time.time()
    elm.train(X_train, y_train_onehot)
    training_time = time.time() - start_train

    start_test = time.time()
    y_pred = elm.predict(X_test)
    testing_time = time.time() - start_test
    inference_time = testing_time / len(X_test)

    y_pred_labels = y_pred.argmax(axis=1)
    acc = accuracy_score(y_test, y_pred_labels)

    # Manual model size estimate (approximate)
    float_size_bytes = 8
    input_dim = X_train.shape[1]
    output_dim = classes
    total_params = neurons * (input_dim + output_dim)
    model_size = total_params * float_size_bytes / (1024 ** 2)  # in MB

    ram_usage = psutil.Process(os.getpid()).memory_info().rss / (1024 ** 2)

    print("\n===== Model Metrics =====")
    print(f"Training Time: {training_time:.2f}s")
    print(f"Inference/sample: {inference_time:.6f}s")
    print(f"Testing Time: {testing_time:.2f}s")
    print(f"Model Size (est.): {model_size:.2f} MB")
    print(f"Trainable Params (est.): {total_params:,}")
    print(f"RAM Usage: {ram_usage:.2f} MB")
    print(f"Accuracy: {acc:.4f}")
    print("Classification Report:\n", classification_report(y_test, y_pred_labels, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_labels))

    return elm

def main():
    train_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H4_train.csv"
    test_path = r"E:\Abroad period research\Phenology datasets\PHENOLOGY_H4_test.csv"

    df_train, target_col = load_and_preprocess_data(train_path)
    X_train, y_train, _ = feature_scaling(df_train, target_col)

    df_test, _ = load_and_preprocess_data(test_path)
    X_test, y_test, _ = feature_scaling(df_test, target_col)

    print("\n Performing Bayesian Optimization for ELM...")
    best_neurons = optimize_hyperparameters(X_train, y_train)
    print(f"\n Optimal Number of Neurons: {best_neurons}")

    print("\n Training Final Model with Optimal Parameters...")
    build_elm_model(X_train, y_train, X_test, y_test, neurons=best_neurons)

if __name__ == "__main__":
    main()
