In [None]:
%matplotlib inline

# Load the modules
import pickle
import math

import numpy as np
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt

# Reload the data
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
  pickle_data = pickle.load(f)
  train_features = pickle_data['train_dataset']
  train_labels = pickle_data['train_labels']
  valid_features = pickle_data['valid_dataset']
  valid_labels = pickle_data['valid_labels']
  test_features = pickle_data['test_dataset']
  test_labels = pickle_data['test_labels']
  del pickle_data  # Free up memory


print('Data and modules loaded.')

2025-11-02 13:19:59.409826: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762085999.433391   37779 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762085999.441627   37779 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1762085999.467367   37779 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762085999.467435   37779 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762085999.467443   37779 computation_placer.cc:177] computation placer alr

In [None]:
# Data has been normalized 
total_samples = len(train_features)
print(total_samples)

# Parameters
learning_rate = 0.01
num_output = 10
epochs = 5
batch_size = 256
steps_per_epoch = int(np.ceil(total_samples / batch_size))
print(steps_per_epoch)

In [None]:
# Cleaning Data
#valid_labels, test_labels = np.array(valid_labels, np.int64), np.array(test_labels, np.int64)
train_data = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
val_features, test_features = np.array(valid_features, np.float32), np.array(test_features, np.float32)

# Data has been normalized

In [None]:
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [None]:
features = train_features.shape[1]

In [None]:
W = tf.Variable(tf.random.normal([features, num_output], name="weights"))
B = tf.Variable(tf.zeros([num_output], name="bias"))

optimizer = tf.optimizers.SGD(learning_rate)

# Model
def model(X):
    return tf.nn.softmax(tf.add(tf.matmul(X, W), B))

def cross_entropy(y_pred, y_true):
    # It has been one-hot encoded before storing as pickle
    #y_true = tf.one_hot(y_true, depth=num_output) 
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred), 1))

def accuracy(y_pred, y_true):
    if len(y_true.shape) > 1 and y_true.shape[1] > 1:
        y_true = tf.argmax(y_true, axis=1)
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(-tf.reduce_sum(tf.cast(correct_prediction, tf.float32)))

def run_optimizer(X, Y):
    with tf.GradientTape() as g:
        logit = model(X)
        loss = cross_entropy(logit, Y)

    gradients = g.gradient(loss, [W, B])
    optimizer.apply_gradients(zip(gradients, [W, B]))
    return None

def batch_data(X, Y, batch_size):
    output_data = []
    sample_size = len(X)
    for step in range(0, sample_size, batch_size):
        start = batch_size * step
        end = batch_size + start
        batch_X = X[start:end]
        batch_Y = Y[start:end]
        yield batch_X, batch_Y

In [None]:
# Training
for epoch in range(1, epochs + 1):
    for step, (batch_X, batch_Y) in enumerate(train_data.take(steps_per_epoch), 1):
        if batch_X is not None:
            run_optimizer(batch_X, batch_Y)

    val_pred = model(valid_features)
    val_loss = cross_entropy(val_pred, valid_labels)
    #val_acc = accuracy(val_pred, valid_labels)
    train_pred = model(batch_X)
    train_loss = cross_entropy(train_pred, batch_Y)
    #train_acc = accuracy(train_pred, batch_Y)
    #print(f"Epoch: {epoch}, train_loss {train_loss}, train_accuracy{train_acc}, val_loss {val_loss}, val_accuracy{val_acc}")
    print(f"Epoch: {epoch}, train_loss {train_loss}, val_loss {val_loss}")
"""
epoch = 1
for step, (batch_X, batch_Y) in enumerate(train_data.take(training_steps), 1):
    run_optimizer(batch_X, batch_Y)


    if step % 1000 == 0:
        val_pred = model(valid_features)
        val_loss = cross_entropy(val_pred, valid_labels)
        #val_acc = accuracy(val_pred, valid_labels)
        train_pred = model(batch_X)
        train_loss = cross_entropy(train_pred, batch_Y)
        #train_acc = accuracy(train_pred, batch_Y)
        #print(f"Epoch: {epoch}, train_loss {train_loss}, train_accuracy{train_acc}, val_loss {val_loss}, val_accuracy{val_acc}")
        print(f"Epoch: {epoch}, train_loss {train_loss}, val_loss {val_loss}")
        epoch += 1

"""

In [8]:
test_pred = model(test_features)
test_loss = cross_entropy(test_pred, test_labels)
test_acc = accuracy(test_pred, test_labels)
print(f"Epoch: {epoch}, train_loss {train_loss}, val_accuracy{train_acc}, val_loss {val_loss}, val_accuracy{val_acc}")

2025-11-02 13:16:40.973151: W tensorflow/core/framework/op_kernel.cc:1844] INVALID_ARGUMENT: required broadcastable shapes
2025-11-02 13:16:40.973249: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: {{function_node __wrapped__Equal_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:Equal] name: 

In [None]:
val_pred = model(valid_features)
val_loss = cross_entropy(val_pred, valid_labels)
print(val_pred.shape)
print(valid_labels.shape)
val_acc = accuracy(val_pred, valid_labels)