In [1]:
import numpy as np

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
# Load the MNIST dataset
data = np.load('mnist.npz')
X_train, y_train = data['x_train'], data['y_train']
X_test, y_test = data['x_test'], data['y_test']

# Normalize pixel values to [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

# Reshape data for the models
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)  # Adding channel dimension
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

# Print dataset details
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (60000, 28, 28, 1)
X_test shape: (10000, 28, 28, 1)
y_train shape: (60000,)
y_test shape: (10000,)


### Build and Train the Baseline Model

In [3]:
## prepare the dataset

# Flatten the training and test datasets
num_pixels = X_train.shape[1] * X_train.shape[2]  # 28*28 = 784
X_train_flattened = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test_flattened = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')

# Normalize inputs from 0-255 to 0-1
X_train_flattened /= 255.0
X_test_flattened /= 255.0

# One-hot encode the labels
from tensorflow.keras.utils import to_categorical
y_train_one_hot = to_categorical(y_train, 10)
y_test_one_hot = to_categorical(y_test, 10)


In [4]:
# Define the baseline model
def baseline_model():
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(10, kernel_initializer='normal', activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Build the model
model = baseline_model()

# Train the model
model.fit(X_train_flattened, y_train_one_hot, validation_data=(X_test_flattened, y_test_one_hot), epochs=10, batch_size=200, verbose=2)

# Save the trained model
model.save('baseline.h5')


Epoch 1/10
300/300 - 2s - loss: 1.5044 - accuracy: 0.6712 - val_loss: 0.7804 - val_accuracy: 0.8318
Epoch 2/10
300/300 - 1s - loss: 0.5964 - accuracy: 0.8558 - val_loss: 0.4614 - val_accuracy: 0.8827
Epoch 3/10
300/300 - 2s - loss: 0.4235 - accuracy: 0.8870 - val_loss: 0.3708 - val_accuracy: 0.9005
Epoch 4/10
300/300 - 2s - loss: 0.3620 - accuracy: 0.8996 - val_loss: 0.3345 - val_accuracy: 0.9040
Epoch 5/10
300/300 - 2s - loss: 0.3306 - accuracy: 0.9070 - val_loss: 0.3099 - val_accuracy: 0.9112
Epoch 6/10
300/300 - 1s - loss: 0.3099 - accuracy: 0.9117 - val_loss: 0.2951 - val_accuracy: 0.9159
Epoch 7/10
300/300 - 1s - loss: 0.2951 - accuracy: 0.9153 - val_loss: 0.2814 - val_accuracy: 0.9179
Epoch 8/10
300/300 - 1s - loss: 0.2824 - accuracy: 0.9186 - val_loss: 0.2713 - val_accuracy: 0.9237
Epoch 9/10
300/300 - 1s - loss: 0.2717 - accuracy: 0.9216 - val_loss: 0.2606 - val_accuracy: 0.9244
Epoch 10/10
300/300 - 1s - loss: 0.2613 - accuracy: 0.9246 - val_loss: 0.2533 - val_accuracy: 0.9265

In [5]:
# Evaluate the model
baseline_loss, baseline_accuracy = model.evaluate(X_test_flattened, y_test_one_hot, verbose=1)
print(f"Baseline Model - Loss: {baseline_loss}, Accuracy: {baseline_accuracy * 100:.2f}%")


Baseline Model - Loss: 0.2533239722251892, Accuracy: 92.65%
