In [None]:
import numpy as np
from keras.datasets import mnist
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense # type: ignore 
from tensorflow.keras.activations import linear, relu, sigmoid # type: ignore 
import matplotlib.pyplot as plt

In [None]:
# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [None]:
print('First image (as array):\n', train_images[0])
print('First label:', train_labels[0])
print('Image shape:', train_images[0].shape)

In [None]:
# Display the first image
plt.figure(figsize=(6, 6))
plt.imshow(train_images[0], cmap='gray')
plt.title(f'First Image - Label: {train_labels[0]}')
plt.axis('off')
plt.show()

In [None]:
# Preprocess the data
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

# Flatten images from (28, 28) to (784,)
train_images = train_images.reshape((train_images.shape[0], 784))
test_images = test_images.reshape((test_images.shape[0], 784))

print(f"Training data shape: {train_images.shape}")
print(f"Test data shape: {test_images.shape}")
print(f"Pixel value range: {train_images.min()} to {train_images.max()}")

In [None]:
model = Sequential([
    tf.keras.Input(shape=(784,)),  # type: ignore
    Dense(256, activation='relu', name="L0"),
    Dense(128, activation='relu', name="L1"),  
    Dense(64, activation='relu', name="L2"),     
    Dense(10, activation='linear', name="OL")    # Removed extra layers
], name='mnist_model')

In [None]:
model.summary()

In [None]:
[layer1, layer2, layer3] = model.layers[:3]

In [None]:
#### Examine Weights shapes
W1,b1 = layer1.get_weights()
W2,b2 = layer2.get_weights()
W3,b3 = layer3.get_weights()

print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")
print(f"W3 shape = {W3.shape}, b3 shape = {b3.shape}")

In [None]:
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # type: ignore
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), # type: ignore
    metrics=['accuracy']
)

history = model.fit(
    train_images, train_labels,
    epochs=100,
    batch_size=128,
    validation_data=(test_images, test_labels), 
    verbose=1
)

In [None]:
# Test prediction on a single image
def display_digit(image):
    plt.figure(figsize=(4, 4))
    plt.imshow(image.reshape(28, 28), cmap='gray')
    plt.axis('off')
    plt.show()

# Get a test image
image_of_four = test_images[1015] 
display_digit(image_of_four* 255)  

# Make prediction
prediction = model.predict(image_of_four.reshape(1, 784))  

print(f"Predicting digit: \n{prediction}")
print(f"Predicted digit: {np.argmax(prediction)}")
print(f"Actual label: {test_labels[1015]}")

In [None]:
# Show probability distribution for prediction
def show_prediction_probabilities(image, true_label, model):

    prediction = model.predict(image.reshape(1, 784), verbose=0)
    

    probabilities = tf.nn.softmax(prediction).numpy()[0]
    predicted_digit = np.argmax(probabilities)
    

    plt.figure(figsize=(12, 4))
    
    # Plot the image
    plt.subplot(1, 2, 1)
    plt.imshow(image.reshape(28, 28), cmap='gray')
    plt.title(f'True Label: {true_label}\nPredicted: {predicted_digit}')
    plt.axis('off')
    
    # Plot probability distribution
    plt.subplot(1, 2, 2)
    digits = np.arange(10)
    bars = plt.bar(digits, probabilities * 100, color='lightblue', edgecolor='navy')
    
    # Highlight the predicted digit
    bars[predicted_digit].set_color('red')
    
    plt.xlabel('Digit')
    plt.ylabel('Probability (%)')
    plt.title('Prediction Probabilities')
    plt.xticks(digits)
    plt.grid(True, alpha=0.3)
    
    # Add percentage labels on bars
    for i, prob in enumerate(probabilities):
        plt.text(i, prob * 100 + 1, f'{prob*100:.1f}%', 
                ha='center', va='bottom', fontsize=8)
    
    plt.tight_layout()
    plt.show()
    
    # Print detailed probabilities
    print(f"Detailed probabilities:")
    for digit, prob in enumerate(probabilities):
        marker = " ← PREDICTED" if digit == predicted_digit else ""
        print(f"Digit {digit}: {prob*100:5.2f}%{marker}")
    
    print(f"\nConfidence: {probabilities[predicted_digit]*100:.2f}%")
    print(f"Correct prediction: {'✓' if predicted_digit == true_label else '✗'}")

# Test with a few different images
test_indices = [0, 1015, 100, 500, 999]
for i, idx in enumerate(test_indices):
    print(f"\n{'='*50}")
    print(f"Test Image #{i+1} (Index {idx})")
    print(f"{'='*50}")
    show_prediction_probabilities(test_images[idx], test_labels[idx], model)

In [None]:
# Evaluate model performance on test data
print("Evaluating model on test data...")
print("="*50)

# Get overall test accuracy
test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")

# Make predictions on entire test set
predictions = model.predict(test_images, verbose=0)
predicted_labels = np.argmax(tf.nn.softmax(predictions), axis=1)

# Calculate accuracy manually for verification
correct_predictions = np.sum(predicted_labels == test_labels)
manual_accuracy = correct_predictions / len(test_labels)
print(f"Manual verification: {correct_predictions}/{len(test_labels)} = {manual_accuracy:.4f} ({manual_accuracy*100:.2f}%)")

# Per-digit accuracy analysis
print(f"\nPer-digit accuracy:")
print("-" * 30)
for digit in range(10):
    digit_mask = test_labels == digit
    digit_predictions = predicted_labels[digit_mask]
    digit_actual = test_labels[digit_mask]
    digit_accuracy = np.sum(digit_predictions == digit_actual) / len(digit_actual)
    digit_count = len(digit_actual)
    print(f"Digit {digit}: {digit_accuracy:.3f} ({digit_accuracy*100:5.1f}%) - {digit_count} samples")

# Show confusion matrix style summary
print(f"\nOverall Performance Summary:")
print("-" * 30)
print(f"Total test samples: {len(test_labels)}")
print(f"Correct predictions: {correct_predictions}")
print(f"Incorrect predictions: {len(test_labels) - correct_predictions}")
print(f"Overall accuracy: {test_accuracy*100:.2f}%")

In [None]:
# Save the trained model weights for use in canvas.py
model.save_weights('mnist_model.weights.h5')
print("Model weights saved as 'mnist_model_weights.h5'")

# Optional: Also save the entire model
model.save('mnist_complete_model.h5')
print("Complete model saved as 'mnist_complete_model.h5'")