In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances
from tensorflow.keras.utils import to_categorical

In [None]:
# Load the KMNIST dataset
(ds_train, ds_test), ds_info = tfds.load(
    'kmnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

# Convert the dataset to NumPy arrays
def convert_to_numpy(dataset):
    images = []
    labels = []
    for image, label in dataset:
        images.append(image.numpy())
        labels.append(label.numpy())
    return np.array(images), np.array(labels)

X_train, y_train = convert_to_numpy(ds_train)
X_test, y_test = convert_to_numpy(ds_test)

# Preprocess the data: Normalize pixel values between 0 and 1
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Flatten the images for RBF network input (28x28 -> 784)
X_train_flat = X_train.reshape(-1, 28*28)  # (60000, 784)
X_test_flat = X_test.reshape(-1, 28*28)    # (10000, 784)

# One-hot encode the labels for classification (10 classes)
y_train_oh = to_categorical(y_train, num_classes=10)
y_test_oh = to_categorical(y_test, num_classes=10)

# Check the shapes
print("X_train_flat shape:", X_train_flat.shape)
print("y_train_oh shape:", y_train_oh.shape)
print("X_test_flat shape:", X_test_flat.shape)
print("y_test_oh shape:", y_test_oh.shape)

X_train_flat shape: (60000, 784)
y_train_oh shape: (60000, 10)
X_test_flat shape: (10000, 784)
y_test_oh shape: (10000, 10)


In [None]:
# Define the RBF layer with Gaussian function
class RBFLayer:
    def __init__(self, num_rbf_units, sigma):
        self.num_rbf_units = num_rbf_units
        self.sigma = sigma

    def rbf_function(self, X, centers):
        dist = euclidean_distances(X, centers)
        return np.exp(-dist**2 / (2 * self.sigma**2))

# Define the architecture
class RBFNetwork:
    def __init__(self, num_rbf_units, input_size, num_classes, sigma):
        self.rbf_layer = RBFLayer(num_rbf_units, sigma)
        self.num_rbf_units = num_rbf_units
        self.input_size = input_size
        self.num_classes = num_classes
        # Xavier/Glorot initialization for better stability
        self.weights = np.random.randn(self.num_rbf_units, self.num_classes) * np.sqrt(2. / (self.num_rbf_units + self.num_classes))
        self.bias = np.zeros(self.num_classes)

    def predict(self, X, centers):
        # Compute the hidden layer output (RBF)
        G = self.rbf_layer.rbf_function(X, centers)
        # Compute the output layer with softmax activation
        logits = G.dot(self.weights) + self.bias
        return self.softmax(logits)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

In [None]:
class RBFTrainer:
    def __init__(self, rbf_network, learning_rate=0.01, epochs=100, batch_size=64, reg_lambda=0.001):
        self.rbf_network = rbf_network
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.reg_lambda = reg_lambda  # Regularization factor

    def cross_entropy_loss(self, y_true, y_pred):
        return -np.mean(np.sum(y_true * np.log(y_pred + 1e-8), axis=1))

    def fit(self, X_train, y_train, centers):
        num_samples = X_train.shape[0]

        for epoch in range(self.epochs):
            indices = np.arange(num_samples)
            np.random.shuffle(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

            for i in range(0, num_samples, self.batch_size):
                X_batch = X_train[i:i+self.batch_size]
                y_batch = y_train[i:i+self.batch_size]

                predictions = self.rbf_network.predict(X_batch, centers)

                loss = self.cross_entropy_loss(y_batch, predictions)

                reg_loss = self.reg_lambda * np.sum(np.square(self.rbf_network.weights))
                total_loss = loss + reg_loss

                error = predictions - y_batch

                G = self.rbf_network.rbf_layer.rbf_function(X_batch, centers)
                grad_weights = G.T.dot(error) + self.reg_lambda * self.rbf_network.weights
                self.rbf_network.weights -= self.learning_rate * grad_weights
                self.rbf_network.bias -= self.learning_rate * np.sum(error, axis=0)

            if (epoch + 1) % 10 == 0:
                print(f'Epoch {epoch+1}/{self.epochs}, Loss: {total_loss:.4f}')

# Define the RBF network parameters
num_rbf_units = 200  # Increased RBF units
input_size = 784     # Input size (28x28 flattened)
num_classes = 10     # Number of output classes
sigma = 2.0          # Adjusted sigma

# Instantiate the RBF network
rbf_net = RBFNetwork(num_rbf_units=num_rbf_units, input_size=input_size, num_classes=num_classes, sigma=sigma)

# Use K-means to determine the centers of RBF units
kmeans = KMeans(n_clusters=num_rbf_units, random_state=42).fit(X_train_flat)
centers = kmeans.cluster_centers_

# Instantiate and train the RBF trainer using mini-batch gradient descent
trainer = RBFTrainer(rbf_net, learning_rate=0.01, epochs=100, batch_size=64, reg_lambda=0.001)
trainer.fit(X_train_flat, y_train_oh, centers)

# Evaluate on the test set
y_pred = rbf_net.predict(X_test_flat, centers)
accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test_oh, axis=1))
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Epoch 10/100, Loss: 2.4702
Epoch 20/100, Loss: 2.9727
Epoch 30/100, Loss: 3.5500
Epoch 40/100, Loss: 4.1034
Epoch 50/100, Loss: 4.7505
Epoch 60/100, Loss: 5.3412
Epoch 70/100, Loss: 5.8234
Epoch 80/100, Loss: 6.4078
Epoch 90/100, Loss: 6.8919
Epoch 100/100, Loss: 7.1989
Test Accuracy: 33.30%
