In [1]:
%matplotlib inline

# Load the modules
import pickle
import math

import numpy as np
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt

# Reload the data
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
  pickle_data = pickle.load(f)
  train_features = pickle_data['train_dataset']
  train_labels = pickle_data['train_labels']
  valid_features = pickle_data['valid_dataset']
  valid_labels = pickle_data['valid_labels']
  test_features = pickle_data['test_dataset']
  test_labels = pickle_data['test_labels']
  del pickle_data  # Free up memory


print('Data and modules loaded.')

2025-11-02 13:46:49.739810: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762087609.762816   47285 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762087609.769045   47285 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1762087609.785778   47285 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762087609.785804   47285 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762087609.785807   47285 computation_placer.cc:177] computation placer alr

Data and modules loaded.


In [2]:
# Data has been normalized 
total_samples = len(train_features)

# Parameters
learning_rate = 0.01
num_output = 10
epochs = 100
batch_size = 256
steps_per_epoch = int(np.ceil(total_samples / batch_size))

In [3]:
# Cleaning Data
#valid_labels, test_labels = np.array(valid_labels, np.int64), np.array(test_labels, np.int64)
train_data = tf.data.Dataset.from_tensor_slices((train_features, train_labels))
val_features, test_features = np.array(valid_features, np.float32), np.array(test_features, np.float32)

# Data has been normalized

I0000 00:00:1762087623.746820   47285 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6096 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 7.5


In [4]:
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [5]:
features = train_features.shape[1]

In [6]:
W = tf.Variable(tf.random.normal([features, num_output], name="weights"))
B = tf.Variable(tf.zeros([num_output], name="bias"))

optimizer = tf.optimizers.SGD(learning_rate)

# Model
def model(X):
    return tf.nn.softmax(tf.add(tf.matmul(X, W), B))

def cross_entropy(y_pred, y_true):
    # It has been one-hot encoded before storing as pickle
    #y_true = tf.one_hot(y_true, depth=num_output) 
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred), 1))

def accuracy(y_pred, y_true):
    if len(y_true.shape) > 1 and y_true.shape[1] > 1:
        y_true = tf.argmax(y_true, axis=1)
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

def run_optimizer(X, Y):
    with tf.GradientTape() as g:
        logit = model(X)
        loss = cross_entropy(logit, Y)

    gradients = g.gradient(loss, [W, B])
    optimizer.apply_gradients(zip(gradients, [W, B]))
    return None

def batch_data(X, Y, batch_size):
    output_data = []
    sample_size = len(X)
    for step in range(0, sample_size, batch_size):
        start = batch_size * step
        end = batch_size + start
        batch_X = X[start:end]
        batch_Y = Y[start:end]
        yield batch_X, batch_Y

In [None]:
# Training
for epoch in range(1, epochs + 1):
    for step, (batch_X, batch_Y) in enumerate(train_data.take(steps_per_epoch), 1):
        run_optimizer(batch_X, batch_Y)

    val_pred = model(valid_features)
    val_loss = cross_entropy(val_pred, valid_labels)
    val_acc = accuracy(val_pred, valid_labels)
    train_pred = model(batch_X)
    train_loss = cross_entropy(train_pred, batch_Y)
    train_acc = accuracy(train_pred, batch_Y)
    print(f"Epoch: {epoch}, train_loss {train_loss}, train_accuracy {train_acc}, val_loss {val_loss}, val_accuracy {val_acc}")

2025-11-02 13:47:18.007478: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 1, train_loss 5.467658996582031, train_accuracy 0.37109375, val_loss 5.660613536834717, val_accuracy 0.3792000114917755


2025-11-02 13:47:25.665420: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 2, train_loss 4.300259113311768, train_accuracy 0.50390625, val_loss 4.0296125411987305, val_accuracy 0.5184000134468079
Epoch: 3, train_loss 3.0464181900024414, train_accuracy 0.62109375, val_loss 3.383882761001587, val_accuracy 0.5858666896820068


2025-11-02 13:47:42.095806: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 4, train_loss 2.7974941730499268, train_accuracy 0.62109375, val_loss 3.040773391723633, val_accuracy 0.6255999803543091
Epoch: 5, train_loss 2.7905006408691406, train_accuracy 0.63671875, val_loss 2.826641082763672, val_accuracy 0.6516000032424927
Epoch: 6, train_loss 2.7243781089782715, train_accuracy 0.6640625, val_loss 2.6814167499542236, val_accuracy 0.669866681098938
Epoch: 7, train_loss 2.5910520553588867, train_accuracy 0.6875, val_loss 2.5707082748413086, val_accuracy 0.6845333576202393


2025-11-02 13:48:19.194709: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 8, train_loss 2.1209535598754883, train_accuracy 0.7265625, val_loss 2.485013484954834, val_accuracy 0.6941333413124084
Epoch: 9, train_loss 2.723581075668335, train_accuracy 0.65625, val_loss 2.417388439178467, val_accuracy 0.7022666931152344
Epoch: 10, train_loss 2.4926133155822754, train_accuracy 0.65234375, val_loss 2.3597350120544434, val_accuracy 0.7094666957855225
Epoch: 11, train_loss 1.890216588973999, train_accuracy 0.765625, val_loss 2.31148362159729, val_accuracy 0.7142666578292847
Epoch: 12, train_loss 2.3173141479492188, train_accuracy 0.69921875, val_loss 2.268470287322998, val_accuracy 0.718666672706604
Epoch: 13, train_loss 2.065749406814575, train_accuracy 0.73046875, val_loss 2.229400634765625, val_accuracy 0.7225333452224731
Epoch: 14, train_loss 2.404020309448242, train_accuracy 0.6953125, val_loss 2.1946511268615723, val_accuracy 0.7258666753768921
Epoch: 15, train_loss 2.2323250770568848, train_accuracy 0.765625, val_loss 2.1631836891174316, val_accuracy 0

2025-11-02 13:49:24.145596: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 16, train_loss 2.0672905445098877, train_accuracy 0.70703125, val_loss 2.134279489517212, val_accuracy 0.7318666577339172
Epoch: 17, train_loss 1.9959032535552979, train_accuracy 0.75390625, val_loss 2.108015537261963, val_accuracy 0.7328000068664551
Epoch: 18, train_loss 2.1770858764648438, train_accuracy 0.74609375, val_loss 2.083986282348633, val_accuracy 0.734666645526886
Epoch: 19, train_loss 2.062645673751831, train_accuracy 0.71875, val_loss 2.0608623027801514, val_accuracy 0.7366666793823242
Epoch: 20, train_loss 2.0989482402801514, train_accuracy 0.73046875, val_loss 2.0385704040527344, val_accuracy 0.7405333518981934
Epoch: 21, train_loss 1.4621585607528687, train_accuracy 0.76953125, val_loss 2.0174548625946045, val_accuracy 0.7427999973297119
Epoch: 22, train_loss 2.478933572769165, train_accuracy 0.73046875, val_loss 1.9979379177093506, val_accuracy 0.744533360004425
Epoch: 23, train_loss 1.8542311191558838, train_accuracy 0.76171875, val_loss 1.980762004852295, val

2025-11-02 13:51:30.568908: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 32, train_loss 1.9041088819503784, train_accuracy 0.76953125, val_loss 1.842910885810852, val_accuracy 0.7537333369255066
Epoch: 33, train_loss 1.6829757690429688, train_accuracy 0.765625, val_loss 1.8304977416992188, val_accuracy 0.7544000148773193
Epoch: 34, train_loss 1.3945703506469727, train_accuracy 0.79296875, val_loss 1.8183270692825317, val_accuracy 0.7531999945640564
Epoch: 35, train_loss 1.7569317817687988, train_accuracy 0.73828125, val_loss 1.8062610626220703, val_accuracy 0.7558666467666626
Epoch: 36, train_loss 1.6796636581420898, train_accuracy 0.765625, val_loss 1.7944135665893555, val_accuracy 0.755466639995575
Epoch: 37, train_loss 2.2555882930755615, train_accuracy 0.703125, val_loss 1.783262848854065, val_accuracy 0.7567999958992004
Epoch: 38, train_loss 1.4571499824523926, train_accuracy 0.78125, val_loss 1.7719125747680664, val_accuracy 0.7567999958992004
Epoch: 39, train_loss 1.834991693496704, train_accuracy 0.765625, val_loss 1.7616252899169922, val_acc

2025-11-02 13:55:51.314233: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch: 64, train_loss 1.788510799407959, train_accuracy 0.7265625, val_loss 1.5508918762207031, val_accuracy 0.7635999917984009
Epoch: 65, train_loss 1.0359854698181152, train_accuracy 0.8203125, val_loss 1.5447192192077637, val_accuracy 0.7633333206176758
Epoch: 66, train_loss 1.6067813634872437, train_accuracy 0.76171875, val_loss 1.539259433746338, val_accuracy 0.7627999782562256


In [None]:
test_pred = model(test_features)
test_loss = cross_entropy(test_pred, test_labels)
test_acc = accuracy(test_pred, test_labels)
print(f"Test_loss {test_loss}, Test_accuracy {test_acc}")