In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

class RVFLClassifier:
    def __init__(self, regularization_param, initial_hidden_nodes=10):
        self.regularization_param = regularization_param
        self.hidden_nodes = initial_hidden_nodes
        self.input_weights = None
        self.bias = None
        self.random_hidden_weights = None
        self.output_weights = None  


    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Randomly initialize input weights and bias
        self.input_weights = np.random.randn(n_features, self.hidden_nodes)
        self.bias = np.random.randn(1, self.hidden_nodes)

        # Generate random hidden layer parameters
        self.random_hidden_weights = np.random.randn(n_features, self.hidden_nodes)

        # Compute hidden layer output
        hidden_output = self._sigmoid(X @ self.random_hidden_weights + self.bias)
        
        H = np.hstack((hidden_output, np.ones((n_samples, 1))))

        # Incrementally update output weights
        if hasattr(self, 'output_weights') and self.output_weights is not None:
            old_output_weights = self.output_weights
            self.output_weights = np.linalg.inv(H.T @ H + self.regularization_param * np.eye(H.shape[1])) @ H.T @ y
            self.output_weights[:, :old_output_weights.shape[1]] += old_output_weights
        else:
            self.output_weights = np.linalg.inv(H.T @ H + self.regularization_param * np.eye(H.shape[1])) @ H.T @ y

    def predict(self, X):
        hidden_output = self._sigmoid(X @ self.random_hidden_weights + self.bias)
        H = np.hstack((hidden_output, np.ones((X.shape[0], 1))))
        predicted_output = H @ self.output_weights
        return np.argmax(predicted_output, axis=1)

def k_fold_cross_validation(X_train, y_train, n_folds=5, model_class=RVFLClassifier, **model_params):
    # Initialize an array to store accuracy scores
    accuracy_scores = []

    # Split the data into k folds
    fold_size = len(X_train) // n_folds
    for i in range(n_folds):
        # Indices for the current fold
        start_idx = i * fold_size
        end_idx = (i + 1) * fold_size if i < n_folds - 1 else len(X_train)
        test_indices = np.array(range(start_idx, end_idx)).astype(int)
        train_indices = np.array(list(np.concatenate((range(0, start_idx), range(end_idx, len(X_train)))))).astype(int)

        # Split the data into train and test sets
        X_fold_train, X_fold_test = X_train[train_indices], X_train[test_indices]
        y_fold_train, y_fold_test = y_train[train_indices], y_train[test_indices]

        # Create and train the model
        model = model_class(**model_params)
        model.fit(X_fold_train, y_fold_train)

        # Make predictions on the test set
        y_pred = model.predict(X_fold_test)

        # Evaluate the model
        accuracy = np.mean(y_pred == np.argmax(y_fold_test, axis=1))
        accuracy_scores.append(accuracy)

    # Compute the average accuracy across all folds
    average_accuracy = np.mean(accuracy_scores)
    return average_accuracy

# Load Iris dataset
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X, y = iris.data, iris.target

# Data preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert target labels to one-hot encoded format
y_one_hot = np.eye(np.unique(y).shape[0])[y]

seed = 42
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_one_hot, test_size=0.2, random_state=seed)

# Define model parameters
model_params = {'regularization_param': 0.1}

# Perform K-Fold Cross-Validation for different numbers of hidden nodes
n_splits = 5
initial_hidden_nodes = 10
hidden_nodes_increment = 5
max_hidden_nodes = 100
best_accuracy = -1
best_num_nodes = None
for n_hidden in range(initial_hidden_nodes, max_hidden_nodes + 1, hidden_nodes_increment):
    average_accuracy = k_fold_cross_validation(X_train, y_train, n_splits, RVFLClassifier, regularization_param=0.1, initial_hidden_nodes=n_hidden)
    if average_accuracy > best_accuracy:
        best_accuracy = average_accuracy
        best_num_nodes = n_hidden
    print(f"Average Accuracy for {n_hidden} hidden nodes: {average_accuracy:.2f}")

print(f"Best number of hidden nodes: {best_num_nodes} with accuracy: {best_accuracy:.2f}")

# Train the model with the best number of hidden nodes on the remaining 20% of the data
best_model = RVFLClassifier(regularization_param=0.1, initial_hidden_nodes=best_num_nodes)
best_model.fit(X_train, y_train)
y_pred_test = best_model.predict(X_test)
accuracy_test = np.mean(y_pred_test == np.argmax(y_test, axis=1))
print(f"Accuracy on test set with best model: {accuracy_test:.2f}")


Average Accuracy for 10 hidden nodes: 0.90
Average Accuracy for 15 hidden nodes: 0.94
Average Accuracy for 20 hidden nodes: 0.97
Average Accuracy for 25 hidden nodes: 0.95
Average Accuracy for 30 hidden nodes: 0.96
Average Accuracy for 35 hidden nodes: 0.95
Average Accuracy for 40 hidden nodes: 0.95
Average Accuracy for 45 hidden nodes: 0.93
Average Accuracy for 50 hidden nodes: 0.92
Average Accuracy for 55 hidden nodes: 0.93
Average Accuracy for 60 hidden nodes: 0.93
Average Accuracy for 65 hidden nodes: 0.94
Average Accuracy for 70 hidden nodes: 0.93
Average Accuracy for 75 hidden nodes: 0.93
Average Accuracy for 80 hidden nodes: 0.93
Average Accuracy for 85 hidden nodes: 0.93
Average Accuracy for 90 hidden nodes: 0.95
Average Accuracy for 95 hidden nodes: 0.94
Average Accuracy for 100 hidden nodes: 0.94
Best number of hidden nodes: 20 with accuracy: 0.97
Accuracy on test set with best model: 1.00
