In [8]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from sklearn.neural_network import BernoulliRBM
from sklearn.metrics import accuracy_score, log_loss
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from tabulate import tabulate
import tensorflow as tf

**Step 1: Data Preparation**

– Scaling and Binarizing Fashion MNIST
We start by loading the Fashion MNIST dataset and reshaping the 28x28 images into 784-dimensional vectors. To prepare the data for training, we normalize the pixel values to the range [0, 1] using MinMaxScaler. Since RBMs work best with binary input, we further binarize the data—converting all values above 0.5 to 1 and the rest to 0. This prepares the dataset for effective feature learning with the RBM.

In [9]:
# Load and normalize Fashion MNIST
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.reshape(-1, 784).astype(np.float32)
x_test = x_test.reshape(-1, 784).astype(np.float32)

# Normalize to [0, 1]
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Binarize input for RBM
x_train_bin = (x_train_scaled > 0.5).astype(np.float32)
x_test_bin = (x_test_scaled > 0.5).astype(np.float32)

**Step 2: Feature Extraction**

– Training a Tuned RBM
In this step, we train a Bernoulli Restricted Boltzmann Machine (RBM) to uncover hidden features from the binarized Fashion MNIST data. We use a well-tuned configuration with 512 hidden units, a moderate learning rate, and a batch size of 64, training for 30 iterations. Once trained, the RBM transforms both the training and testing datasets into compact, meaningful latent representations. These new feature vectors will later serve as input for our classification model.

In [10]:
# -----------------------------
# Better Tuned RBM
# -----------------------------
rbm = BernoulliRBM(n_components=512, learning_rate=0.01, batch_size=64, n_iter=30, verbose=True, random_state=42)
rbm.fit(x_train_bin)

# Transform data
x_train_rbm = rbm.transform(x_train_bin)
x_test_rbm = rbm.transform(x_test_bin)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -213.72, time = 27.60s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -189.50, time = 23.17s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -170.41, time = 21.75s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -160.33, time = 25.82s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -148.88, time = 23.52s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -141.76, time = 23.83s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -137.36, time = 21.85s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -132.35, time = 23.78s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -129.14, time = 23.53s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -125.03, time = 24.54s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -122.12, time = 23.28s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -121.54, time = 22.22s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -118.90, time = 23.58s
[BernoulliRBM] Iteration 14, pseudo-likelihood = -116.65, time = 23.92s
[

**Step 3: Classification**

– Training a Deep MLP on RBM Features
Now that we have meaningful features from the RBM, we use them to train a deep Multi-Layer Perceptron (MLP) classifier. The model includes dense layers with batch normalization, ReLU activations, and dropout for regularization. This setup helps the network learn effectively while reducing overfitting. We train the MLP on the RBM-transformed training data for 25 epochs, using a 10% validation split to monitor performance. This classifier learns to map the compressed RBM features to the correct fashion category labels.

In [11]:
# -----------------------------
# Deep MLP on RBM features
# -----------------------------
mlp_rbm = Sequential([
    Input(shape=(512,)),
    Dense(256), BatchNormalization(), tf.keras.layers.ReLU(),
    Dropout(0.3),
    Dense(128), BatchNormalization(), tf.keras.layers.ReLU(),
    Dropout(0.3),
    Dense(10, activation='softmax')
])
mlp_rbm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

mlp_rbm.fit(x_train_rbm, y_train, epochs=25, batch_size=128, validation_split=0.1, verbose=1)

Epoch 1/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.6988 - loss: 0.8628 - val_accuracy: 0.8063 - val_loss: 0.5188
Epoch 2/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.8030 - loss: 0.5431 - val_accuracy: 0.8207 - val_loss: 0.4806
Epoch 3/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.8141 - loss: 0.5062 - val_accuracy: 0.8240 - val_loss: 0.4612
Epoch 4/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8234 - loss: 0.4796 - val_accuracy: 0.8297 - val_loss: 0.4625
Epoch 5/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.8332 - loss: 0.4560 - val_accuracy: 0.8362 - val_loss: 0.4400
Epoch 6/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.8330 - loss: 0.4498 - val_accuracy: 0.8275 - val_loss: 0.4753
Epoch 7/25
[1m422/422[0m

<keras.src.callbacks.history.History at 0x791695bac090>

**Step 4: Baseline**

– Training a Raw MLP Classifier
To fairly evaluate the benefit of RBM-based feature extraction, we train a baseline MLP classifier directly on the raw, scaled pixel data (without RBM). This model uses a slightly larger architecture to match the complexity of the RBM-based MLP. We again apply batch normalization, ReLU activation, and dropout to improve learning and generalization. Training follows the same schedule—25 epochs with a batch size of 128 and 10% of data reserved for validation—allowing for a consistent comparison.

In [12]:
# -----------------------------
# Raw MLP for comparison
# -----------------------------
mlp_raw = Sequential([
    Input(shape=(784,)),
    Dense(512), BatchNormalization(), tf.keras.layers.ReLU(),
    Dropout(0.3),
    Dense(256), BatchNormalization(), tf.keras.layers.ReLU(),
    Dropout(0.3),
    Dense(10, activation='softmax')
])
mlp_raw.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
mlp_raw.fit(x_train_scaled, y_train, epochs=25, batch_size=128, validation_split=0.1, verbose=1)

Epoch 1/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.7684 - loss: 0.6601 - val_accuracy: 0.8373 - val_loss: 0.4379
Epoch 2/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - accuracy: 0.8544 - loss: 0.3961 - val_accuracy: 0.8637 - val_loss: 0.3659
Epoch 3/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - accuracy: 0.8750 - loss: 0.3481 - val_accuracy: 0.8673 - val_loss: 0.3540
Epoch 4/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - accuracy: 0.8840 - loss: 0.3143 - val_accuracy: 0.8782 - val_loss: 0.3290
Epoch 5/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.8904 - loss: 0.2963 - val_accuracy: 0.8760 - val_loss: 0.3274
Epoch 6/25
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.8923 - loss: 0.2886 - val_accuracy: 0.8747 - val_loss: 0.3369
Epoch 7/25
[1m42

<keras.src.callbacks.history.History at 0x79177c0711d0>

**Step 6: Result Summary**

– Tabulating Model Performance
To clearly visualize and compare the effectiveness of both models, we present their evaluation metrics in a table. This includes the test loss and accuracy for:

The MLP trained on RBM-extracted features

The MLP trained on raw image data

This summary provides a direct comparison to assess the impact of using unsupervised feature learning with an RBM on classification performance.

In [16]:
# -----------------------------
# Evaluation
# -----------------------------
rbm_preds = mlp_rbm.predict(x_test_rbm)
rbm_loss = log_loss(y_test, rbm_preds)
rbm_acc = accuracy_score(y_test, np.argmax(rbm_preds, axis=1))

raw_preds = mlp_raw.predict(x_test_scaled)
raw_loss = log_loss(y_test, raw_preds)
raw_acc = accuracy_score(y_test, np.argmax(raw_preds, axis=1))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


**Step 5: Evaluation**

– Comparing the Two Models
Finally, we evaluate both classifiers on the test set to measure their performance. We calculate:

Log Loss, which reflects how well the predicted probabilities match the true labels.

Accuracy, indicating the proportion of correct predictions.

The RBM-based MLP uses the hidden feature representation, while the raw MLP uses the original scaled pixel data. By comparing these metrics, we can see whether the RBM's unsupervised feature learning provides a meaningful boost to classification performance.

In [17]:
# Comparison Table
results = [
    ["Model", "Test Loss", "Test Accuracy"],
    ["MLP with RBM Features", f"{rbm_loss:.4f}", f"{rbm_acc*100:.2f}%"],
    ["MLP with Raw Images", f"{raw_loss:.4f}", f"{raw_acc*100:.2f}%"]
]

print(tabulate(results, headers="firstrow", tablefmt="fancy_grid"))


╒═══════════════════════╤═════════════╤═════════════════╕
│ Model                 │   Test Loss │ Test Accuracy   │
╞═══════════════════════╪═════════════╪═════════════════╡
│ MLP with RBM Features │      0.4354 │ 84.76%          │
├───────────────────────┼─────────────┼─────────────────┤
│ MLP with Raw Images   │      0.3432 │ 88.98%          │
╘═══════════════════════╧═════════════╧═════════════════╛
