In [8]:
# Import libraries 
import tensorflow as tf 
from tensorflow.keras import layers, models 
import numpy as np 
import matplotlib.pyplot as plt

#Load the MNIST dataset
(train_images,train_labels),(test_images,test_labels) = tf.keras.datasets.mnist.load_data()

#Normalize the pixel values from [0,255] to [0,1]. This helps the model train faster and morestably
train_images = train_images / 255.0
test_images = test_images / 255.0

#Check the shape of the data
print("Training images shape:", train_images.shape) #(60000,28,28)
print("Training labels shape:", train_labels.shape) #(60000)
print("Test images shape:", test_images.shape) #(10000,28,28)

#Visualize a sample
plt.figure(figsize=(5,5))
plt.imshow(train_images[0], cmap='gray')
plt.title(f"Label: {train_labels[0]}")
plt.colorbar()
plt.show()

# Initialize a Sequential model (a linear stack of layers) 
model = models.Sequential() 
# Flatten the 28x28 image into a 1D vector of 784 pixels. 
# This is our input layer. 
model.add(layers.Flatten(input_shape=(28, 28))) 
# First Hidden Layer: 128 neurons, ReLU activation function. 
model.add(layers.Dense(128, activation='relu')) 
# A Dense layer means every neuron is connected to every neuron in the previous layer. 
# Second Hidden Layer: 64 neurons, ReLU activation. 
model.add(layers.Dense(64, activation='relu')) 
# Output Layer: 10 neurons (one for each digit 0-9), Softmax activation. 
# Softmax converts the 10 outputs into a probability distribution (they sum to 1). 
model.add(layers.Dense(10, activation='softmax')) 
# Let's see the architecture! 
model.summary() 
*Analyze the model.summary() output. Note the number of parameters. How are they 
calculated? (e.g., in the first Dense layer: (784 inputs * 128 neurons) + 128 biases = 100,480 
parameters).* 

model.compile(optimizer='adam',      
Descent 
 # A sophisticated and very popular version of Gradient 
loss='sparse_categorical_crossentropy', # Use this for integer labels (0, 1, 2...) 
metrics=['accuracy'])   # We want to track accuracy during training

# Train the model for 5 epochs (5 full passes through the training data) 
# 'validation_data' is used to evaluate the loss and metrics at the end of each epoch. 
history = model.fit(train_images, train_labels, epochs=5, 
validation_data=(test_images, test_labels)) 

# Plot the training history to see how the model learned 
plt.figure(figsize=(12, 4)) 
plt.subplot(1, 2, 1) 
plt.plot(history.history['accuracy'], label='Training Accuracy') 
plt.plot(history.history['val_accuracy'], label = 'Validation Accuracy') 
plt.xlabel('Epoch') 
plt.ylabel('Accuracy') 
plt.ylim([0.9, 1]) 
plt.legend(loc='lower right') 
plt.title('Training and Validation Accuracy') 
plt.subplot(1, 2, 2) 
plt.plot(history.history['loss'], label='Training Loss') 
plt.plot(history.history['val_loss'], label = 'Validation Loss') 
plt.xlabel('Epoch') 
plt.ylabel('Loss') 
plt.legend(loc='upper right') 
plt.title('Training and Validation Loss') 
plt.show() 
# Evaluate the final model on the entire test set 
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) 
print(f"\nFinal Test Accuracy: {test_acc:.4f}")

# Let's make a prediction on the first test image 
predictions = model.predict(test_images) # Gets probabilities for all classes 
first_image_prediction = predictions[0] 
# The prediction is an array of 10 probabilities. The highest one is the predicted class. 
predicted_class = np.argmax(first_image_prediction) 
true_class = test_labels[0] 
print(f"Model's predicted probabilities: {first_image_prediction}") 
print(f"Predicted class: {predicted_class}, True class: {true_class}") 
# Check if the prediction was correct 
print(f"Prediction is correct: {predicted_class == true_class}") 



ModuleNotFoundError: No module named 'tensorflow'