# Extreme Learning Machine (ELM)

## Importing the libraries

In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

## Importing the dataset

In [54]:
df = pd.read_csv("./Dataset/IRIS.csv")

X = df.iloc[:,:-1]
y = df.iloc[:, -1]

## Splitting the dataset into the Training set and Test set

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Feature Scaling

In [56]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Training the ELM model on the Training set

In [57]:
class ELMClassifier:
    def __init__(self, input_dim, hidden_dim=500, activation='relu'):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.W = np.random.randn(input_dim, hidden_dim)
        self.b = np.random.randn(hidden_dim)

    def _activation(self, X):
        if self.activation == 'sigmoid':
            return 1 / (1 + np.exp(-X))
        elif self.activation == 'relu':
            return np.maximum(0, X)
        else:
            return X

    def fit(self, X, y):
        y_onehot = np.zeros((y.size, y.max() + 1))
        y_onehot[np.arange(y.size), y] = 1
        H = self._activation(np.dot(X, self.W) + self.b)
        self.beta = np.linalg.pinv(H) @ y_onehot

    def predict(self, X):
        H = self._activation(np.dot(X, self.W) + self.b)
        y_pred_prob = H @ self.beta
        return np.argmax(y_pred_prob, axis=1)


elm_clf = ELMClassifier(input_dim=X_train_scaled.shape[1], hidden_dim=500, activation='relu')
elm_clf.fit(X_train_scaled, y_train)

## Predicting the Test set results

In [58]:
y_pred = elm_clf.predict(X_test_scaled)

print("\nðŸ“Š ELM Classification Report:")
print(classification_report(y_test, y_pred))



ðŸ“Š ELM Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.91      1.00      0.95        10
           2       1.00      0.90      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



## Making the Confusion Matrix

In [59]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Confusion Matrix:
 [[10  0  0]
 [ 0 10  0]
 [ 0  1  9]]
Accuracy: 96.67%


# ADMM Top 5 features(SVM)

## ADMM Feature Selection

In [60]:
def select_top_k_via_admm(X, y, lam=0.01, rho=1.0, iters=200, k=5):
    Xn = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-12)
    Xn = Xn.values if hasattr(Xn, "values") else np.asarray(Xn)
    yv = y.values if hasattr(y, "values") else np.asarray(y)

    n, p = Xn.shape
    w = np.zeros(p)
    z = np.zeros(p)
    u = np.zeros(p)

    XtX = Xn.T @ Xn
    Xty = Xn.T @ yv
    I = np.eye(p)

    def soft_threshold(v, t):
        return np.sign(v) * np.maximum(np.abs(v) - t, 0.0)

    for _ in range(iters):
        w = np.linalg.solve(XtX + rho * I, Xty + rho * (z - u))
        z = soft_threshold(w + u, lam / rho)
        u = u + (w - z)

    idx = np.argsort(-np.abs(z))[:k]
    selected_cols = list(X.columns[idx])
    return selected_cols

top5_cols = select_top_k_via_admm(X_train, y_train, lam=0.01, rho=1.0, iters=300, k=3)
print("Selected (Top-5 ADMM):", top5_cols)

X = X[top5_cols]

Selected (Top-5 ADMM): ['petal_width', 'petal_length', 'sepal_length']


## Splitting the dataset into the Training set and Test set

In [61]:
# Full feature matrix (no target columns)
X_all = X
y_cls_all = y

X_train, X_test, y_train_cls, y_test_cls= train_test_split(X_all, y_cls_all, test_size=0.2, random_state=42,stratify=y_cls_all)


## Feature Scaling

In [62]:
clf_scaler = StandardScaler()
X_train_clf = clf_scaler.fit_transform(X_train)
X_test_clf  = clf_scaler.transform(X_test)

## Training the ELM model on the Training set

In [63]:
elm = ELMClassifier(input_dim=X_train_clf.shape[1], hidden_dim=500, activation='relu')
elm.fit(X_train_clf, y_train_cls)

y_pred = elm.predict(X_test_clf)

## Predicting the Test set results

In [64]:
print("\nðŸ“Š ELM Classification Report:")
print(classification_report(y_test_cls, y_pred))



ðŸ“Š ELM Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      0.90      0.90        10
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30



## Making the Confusion Matrix

In [65]:
print("Confusion Matrix:\n", confusion_matrix(y_test_cls, y_pred))
print(f"Accuracy: {accuracy_score(y_test_cls, y_pred) * 100:.2f}%")

Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]
Accuracy: 93.33%


## Online Learing in ELM

In [66]:
from time import perf_counter
RANDOM_STATE = 42
TOP_K = 5                 # number of features to keep via ADMM (Lasso)
HIDDEN_DIM = 300          # ELM hidden neurons
ACTIVATION = 'relu'       # 'relu' or 'sigmoid'
RIDGE = 1e-2              # RLS regularization (lambda)
PRINT_EVERY = 200         # progress print frequency during online updates

rng = np.random.default_rng(RANDOM_STATE)

In [72]:
class OnlineELMClassifier:

    def __init__(self, input_dim, n_classes, hidden_dim=300, activation='relu', ridge=1e-2, rng=None):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.n_classes = n_classes
        self.activation = activation
        self.ridge = ridge
        self.rng = np.random.default_rng() if rng is None else rng

        # Random hidden layer weights
        if activation == 'relu':
            self.W = self.rng.normal(0, np.sqrt(2.0 / input_dim), size=(input_dim, hidden_dim))
        else:
            self.W = self.rng.normal(0, 1.0 / np.sqrt(input_dim), size=(input_dim, hidden_dim))
        self.b = self.rng.normal(0, 1e-3, size=(hidden_dim,))

        # Output weights beta
        self.beta = np.zeros((hidden_dim, n_classes))

        # Recursive Least Squares inverse correlation matrix
        self.P = (1.0 / self.ridge) * np.eye(hidden_dim)

    def _act(self, Z):
        if self.activation == 'relu':
            return np.maximum(0.0, Z)
        elif self.activation == 'sigmoid':
            return 1.0 / (1.0 + np.exp(-Z))
        else:
            return Z  # linear

    def _hidden(self, x_row):
        return self._act(x_row @ self.W + self.b)

    def partial_fit(self, x_row, y_label):
        """
        Update beta and P with a single sample.
        y_label must be an integer in [0, n_classes-1].
        """
        h = self._hidden(x_row)
        h = h.reshape(-1, 1)

        # One-hot encode target
        y = np.zeros((self.n_classes, 1))
        y[y_label, 0] = 1.0

        # Recursive Least Squares (RLS) update
        Ph = self.P @ h
        denom = 1.0 + (h.T @ Ph)[0, 0]
        k = Ph / denom
        y_pred = (h.T @ self.beta).T
        e = y - y_pred

        self.beta += k @ e.T
        self.P -= k @ (h.T @ self.P)

    def predict(self, X):
        H = self._act(X @ self.W + self.b)
        scores = H @ self.beta
        return np.argmax(scores, axis=1)


df = pd.read_csv("./Dataset/IRIS.csv")  

X = df.drop(columns=["species"]).values
y = df["species"].values  


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


n_classes = len(np.unique(y_train))
clf = OnlineELMClassifier(
    input_dim=X_train.shape[1],
    n_classes=n_classes,
    hidden_dim=100,   
    activation='relu',
    ridge=1e-2
)

PRINT_EVERY = 20
t0 = perf_counter()

for i in range(X_train.shape[0]):
    x_row = X_train[i].reshape(1, -1)
    y_row = int(y_train[i])
    clf.partial_fit(x_row, y_row)

    if (i + 1) % PRINT_EVERY == 0:
        print(f"Online updates processed: {i+1}/{X_train.shape[0]}")

train_time = perf_counter() - t0
print(f"\nOnline training time: {train_time:.3f} s")

Online updates processed: 20/120
Online updates processed: 40/120
Online updates processed: 60/120
Online updates processed: 80/120
Online updates processed: 100/120
Online updates processed: 120/120

Online training time: 0.006 s


## Predicting Test Results

In [73]:
y_pred = clf.predict(X_test)

## Analysis

In [74]:
print("\nðŸ“Š Online ELM Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


ðŸ“Š Online ELM Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.86      0.60      0.71        10
           2       0.69      0.90      0.78        10

    accuracy                           0.83        30
   macro avg       0.85      0.83      0.83        30
weighted avg       0.85      0.83      0.83        30

Confusion Matrix:
 [[10  0  0]
 [ 0  6  4]
 [ 0  1  9]]
Accuracy: 0.8333333333333334
