In [1]:
# Importing required libraries

import tensorflow as tf
import numpy as np

from tensorflow.keras import Model, layers
from sklearn.model_selection import train_test_split

2025-11-04 07:59:16.356715: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762239556.372613    3013 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762239556.377714    3013 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1762239556.390426    3013 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762239556.390454    3013 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1762239556.390456    3013 computation_placer.cc:177] computation placer alr

In [2]:
from tensorflow.keras.datasets import mnist

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Convert to numpy float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Normalize data between 0 and 1
x_train, x_test = x_train/255, x_test/255

# splitting Data
x_train, valid_data, y_train, valid_labels = train_test_split(
    x_train, y_train,
    test_size = 0.25,
    random_state = 42,
    shuffle=True
)


In [3]:
# Checking data

x_train = tf.pad(x_train, paddings=[[0, 0], [2, 2], [2, 2]], mode="CONSTANT", constant_values=0)
x_test = tf.pad(x_test, paddings=[[0, 0], [2, 2], [2, 2]], mode="CONSTANT", constant_values=0)

print(x_train.shape)

(45000, 32, 32)


I0000 00:00:1762239561.121778    3013 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6096 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2070 with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 7.5


In [4]:
print(x_train[0].shape)

(32, 32)


In [5]:
# Parameters 
total_samples = len(x_train)

train_size = int(0.75 * len(x_train))
num_classes = 10

batch_size = 125
learning_rate = 0.001
epochs = 10
steps_per_epoch = int(np.ceil(total_samples / batch_size))

conv_layer1 = 6
conv_layer2 = 16

In [6]:
# Data batching, splitting and shuffle

train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.shuffle(5000).repeat()

#train_data = train_data.take(train_size)
#valid_data = train_data.skip(train_size)

train_data = train_data.batch(batch_size).prefetch(tf.data.AUTOTUNE)
#valid_data = valid_data.batch(batch_size).prefetch(tf.data.AUTOTUNE)

In [17]:
# Defining Model
class LeNet(tf.Module):
    def __init__(self):
        super(LeNet, self).__init__()

        # Convolutional Layer 1
        self.w1 = tf.Variable(tf.random.normal([3, 3, 1, conv_layer1]), name="weight1", trainable=True)
        self.b1 = tf.Variable(tf.zeros([conv_layer1]), name="bias1", trainable=True)

        # Convolutional Layer 2
        self.w2 = tf.Variable(tf.random.normal([3, 3, conv_layer1, conv_layer2]), name="weight2", trainable=True)
        self.b2 = tf.Variable(tf.zeros([conv_layer2]), name="bias2", trainable=True)

        # Fully connected layer 1
        self.w3 = tf.Variable(tf.random.normal([5*5*conv_layer2, 120]), name="weight3", trainable=True)
        self.b3 = tf.Variable(tf.zeros([120]), name="bias3", trainable=True)

        # Fully connected layer 2
        self.w4 = tf.Variable(tf.random.normal([120, 84]), name="weight4", trainable=True)
        self.b4 = tf.Variable(tf.zeros([84]), name="bias4", trainable=True)

        # Output Layer
        self.w5 = tf.Variable(tf.random.normal([84, 10]), name="weight5", trainable=True)
        self.b5 = tf.Variable(tf.zeros([10]), name="bias5", trainable=True)

        
    def conv2d(self, x, filter_W, bias_conv, stride=2, padding="VALID"):
        conv_layer = tf.nn.conv2d(
            x, filter_W,
            strides = [1, stride, stride, 1],
            padding = padding
        )
        conv_layer = tf.nn.bias_add(conv_layer, bias_conv)
        conv_layer = tf.nn.relu(conv_layer)
        return conv_layer

    def maxpool2d(self, x, k=2):
        return tf.nn.max_pool(x, k=[1, k, k, 1], strides=[1, k, k, 1], padding="SAME")
    
    def __call__(self, x, is_training=False):
        
        # Layer 1
        conv1 = self.conv2d(x, self.w1, self.b1)
        conv1 = self.maxpool2d(conv1, k=2)

        # Layer 2
        conv2 = self.conv2d(conv1, self.w2, self.b2)
        conv2 = self.maxpool2d(conv2, k=2)

        # Flatten Layer 
        flatten = tf.reshape(conv2, [-1, self.w3.get_shape().as_list(0)[0]])

        # Fully connected layer 1
        fc1 = tf.nn.bias_add(tf.matmul(flatten, self.w3), self.b3)
        fc1 = tf.nn.relu(fc1)

        # Fullly connected layer 2
        fc2 = tf.nn.bias_add(tf.matmul(fc1, self.w4), self.b4)
        fc2 = tf.nn.relu(fc2)

        # Output layer
        out = tf.nn.bias_add(tf.matmul(fc2, self.w5), self.b5)

        if not is_training:
            return tf.nn.softmax(out)

        return out

In [18]:
optimizer = tf.optimizers.Adam(learning_rate)
model = LeNet()

In [19]:
def cross_entropy(pred, y_true):
    y_true = tf.cast(y_true, tf.int64)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=x)
    return tf.reduce_mean(loss)

def accuracy(pred, y_true):
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

In [20]:
def run_optimization(X, Y):
    with tf.GradientTape() as g:
        logits = model(X, is_training=True)
        loss = cross_entropy(logits, Y)

    gradient = g.gradients(loss, model.trainable_variables)
    optimizer.apply_gradients(gradient, model.trainable_variables)

In [21]:
for epoch in range(epochs + 1):
    for step, (batch_X, batch_Y) in enumerate(train_data.take(steps_per_epoch), 1):
        run_optimization(batch_X, batch_Y)

    train_pred = model(batch_X, is_training=True)
    train_loss = cross_entropy(train_pred, batch_Y)
    train_acc = accuracy(train_pred, batch_Y)
    valid_pred = model(valid_data, is_training=True)
    valid_loss = cross_entropy(valid_pred, valid_labels)
    valid_acc = accuracy(valid_pred, valid_labels)

    print(f"Epoch {epoch}, train_loss {train_loss}, train_accuracy{train_acc}, val_loss{val_loss}, val_accuracy{val_acc}")

ValueError: Attempt to convert a value (_TupleWrapper((<tf.Variable 'weight1:0' shape=(3, 3, 1, 6) dtype=float32, numpy=
array([[[[-1.0434712 , -0.12306508,  0.7636716 , -0.73300934,
          -1.2034198 , -1.1468683 ]],

        [[-1.3701671 ,  0.2196709 ,  0.481947  ,  0.667011  ,
          -0.6440105 , -0.1546383 ]],

        [[ 0.23313454, -0.05384781, -1.4789863 , -1.7302068 ,
          -0.51457494,  1.3431426 ]]],


       [[[ 0.29648885,  1.0678004 ,  0.8219487 , -0.8838747 ,
           0.33531335, -0.9067335 ]],

        [[-1.8494409 ,  1.2814767 ,  0.15951508,  0.68538016,
           0.07038898, -1.4757422 ]],

        [[-0.7577252 ,  0.57884985,  0.70002645, -0.11738571,
           0.35614645,  0.79097   ]]],


       [[[-0.27882332, -1.6731254 ,  0.959114  , -0.6997842 ,
           0.30457515,  0.6272917 ]],

        [[-0.7665482 ,  0.7495768 ,  0.24067816, -0.01117166,
          -0.02441178,  0.13706195]],

        [[ 1.1451366 ,  0.04250425, -1.9980545 , -0.7088995 ,
          -0.7577295 ,  1.6958355 ]]]], dtype=float32)>,))) with an unsupported type (<class 'tensorflow.python.trackable.data_structures._TupleWrapper'>) to a Tensor.