### Running MNIST model using regular Numpy and then with xlns weights

In [1]:
import get_mnist_data
import mlp_np

In [2]:
import numpy as np

In [3]:
%%time

# Retrieve the MNIST data
mnist_train_images, mnist_train_labels, mnist_test_images, mnist_test_labels = get_mnist_data.get_mnist_data_numpy_format()

# Print shape for verification
print(mnist_train_images.shape)  # Should be (60000, 28, 28)
print(mnist_train_labels.shape)  # Should be (60000,)
print(mnist_test_images.shape)   # Should be (10000, 28, 28)
print(mnist_test_labels.shape)   # Should be (10000,)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)
CPU times: user 230 ms, sys: 55.7 ms, total: 286 ms
Wall time: 286 ms


In [4]:
print(mnist_train_images.dtype)

uint8


In [5]:
%%time

# Load the first MNIST training image
first_image = mnist_train_images[0].flatten().reshape(1, 784)  # Shape (1, 784)
first_image = first_image / 255.0  # Normalize pixel values

# Initialize weights
W1 = np.random.normal(0, 0.1, (785, 100))  # Input -> Hidden
W2 = np.random.normal(0, 0.1, (101, 10))   # Hidden -> Output

# Run the feedforward pass
Y_pred = mlp_np.feedforward(first_image, W1, W2)

# Print output probabilities
print(Y_pred)  # Shape: (10, 10), each row is a probability distribution over digits 0-9

[[0.07280306 0.08740606 0.06117793 0.125009   0.10067844 0.12158931
  0.05928076 0.08761597 0.09419805 0.19024144]]
CPU times: user 12.1 ms, sys: 2.07 ms, total: 14.1 ms
Wall time: 13.5 ms


In [6]:
%%time

# ---- LOAD DATA ----
# Select the first 100 samples for training (you can increase this)
X_train = mnist_train_images[:100].reshape(100, 784) / 255.0  # Normalize
Y_train = mnist_train_labels[:100]  # Labels (0-9)

# ---- TRAIN MODEL ----
W1, W2, losses = mlp_np.train_nn(X_train, Y_train, W1, W2, epochs=100, lr=0.01)

# ---- TESTING ----
# Predict the first image
predicted_labels = mlp_np.predict(X_train[:20], W1, W2)[0:20]
print("Predicted Label:", predicted_labels)
print("True Label:", Y_train[0:20])

np.sum: -238.27300574407283
Epoch 0/100, Loss: 2.3827
np.sum: -166.77231781809587
np.sum: -166.87671806067607
np.sum: -127.10971021096915
np.sum: -88.32615405903822
np.sum: -63.58374563533892
np.sum: -39.272145220015034
np.sum: -29.050736159955285
np.sum: -26.804146582268835
np.sum: -33.176503315968034
np.sum: -24.664575943847268
Epoch 10/100, Loss: 0.2466
np.sum: -10.195587680850725
np.sum: -6.847881657402174
np.sum: -5.552969734005894
np.sum: -4.707957688507854
np.sum: -4.082115306754854
np.sum: -3.6021248711197686
np.sum: -3.223778084733282
np.sum: -2.915410099249192
np.sum: -2.6584040926002115
np.sum: -2.442802697116406
Epoch 20/100, Loss: 0.0244
np.sum: -2.2583773236109987
np.sum: -2.0985242581114263
np.sum: -1.9597724374363623
np.sum: -1.8381394286064106
np.sum: -1.7302977219948872
np.sum: -1.6340480653683502
np.sum: -1.5471444011534805
np.sum: -1.4685911984241027
np.sum: -1.3968338129451356
np.sum: -1.331552969075923
Epoch 30/100, Loss: 0.0133
np.sum: -1.2719941768867908
np.sum:

In [7]:
import xlns as xl

In [21]:
%%time

# Load the first MNIST training image
first_image = mnist_train_images[0].flatten().reshape(1, 784)  # Shape (1, 784)
first_image = first_image / 255.0  # Normalize pixel values

# Initialize weights
W1 = xl.xlnsnp(np.random.normal(0, 0.1, (785, 100)))  # Input -> Hidden
W2 = xl.xlnsnp(np.random.normal(0, 0.1, (101, 10)))   # Hidden -> Output

# Run the feedforward pass
Y_pred = mlp_np.feedforward(first_image, W1, W2)

# Print output probabilities
print(Y_pred)  # Shape: (10, 10), each row is a probability distribution over digits 0-9

nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
[[xlnsnp([xlns(0.0896660603893914)]) xlnsnp([xlns(0.10581346708038011)])
  xlnsnp([xlns(0.11242814195245047)]) xlnsnp([xlns(0.09837844510378241)])
  xlnsnp([xlns(0.09946285676511078)]) xlnsnp([xlns(0.09522885768438781)])
  xlnsnp([xlns(0.08919378748964538)]) xlnsnp([xlns(0.09498263379288038)])
  xlnsnp([xlns(0.11001047548333014)]) xlnsnp([xlns(0.1048352931814453)])]]
CPU times: user 4.56 s, sys: 7 ms, total: 4.56 s
Wall time: 4.56 s


We can see that we were able to do the basic feedforward steps using weights in the xlnsnp format
when the input data (first image) is a numpy format with datatype uint8

In [8]:
%%time

# ---- LOAD DATA ----
# Select the first 100 samples for training (you can increase this)
n_samples = 5
X_train = mnist_train_images[:n_samples].reshape(n_samples, 784) / 255.0  # Normalize
Y_train = mnist_train_labels[:n_samples]  # Labels (0-9)

# Initialize weights
W1 = xl.xlnsnp(np.random.normal(0, 0.1, (785, 100)))  # Input -> Hidden
W2 = xl.xlnsnp(np.random.normal(0, 0.1, (101, 10)))   # Hidden -> Output

# ---- TRAIN MODEL ----
W1, W2, losses = mlp_np.train_nn(X_train, Y_train, W1, W2, epochs=100, lr=0.01)

# ---- TESTING ----
# Predict the first image
predicted_labels = mlp_np.predict(X_train[:20], W1, W2)[0:20]
print("Predicted Label:", predicted_labels)
print("True Label:", Y_train[0:20])

nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
nonscalar comparison
np.sum: [xlns(-11.71025909405102)]


AttributeError: 'xlnsnp' object has no attribute 'T'

We can see that an error was produced from trying to transpose the xlnsnp data format.
This used the defective cross_entropy_loss_d