In [9]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, metrics
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

print("✅ Libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Check if GPU is available
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"Number of GPUs: {len(tf.config.list_physical_devices('GPU'))}")


✅ Libraries imported successfully!
TensorFlow version: 2.19.0
Keras version: 3.9.2
GPU Available: []
Number of GPUs: 0


In [10]:
# Load MNIST dataset
print("📥 Loading MNIST dataset...")
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print("✅ MNIST dataset loaded!")
print(f"Training data shape: {x_train.shape}")
print(f"Test data shape: {x_test.shape}")

# Preprocess the data
print("🔄 Preprocessing data...")

# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Flatten images for feedforward network (28x28 -> 784)
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

# One-hot encode labels
y_train_onehot = to_categorical(y_train, 10)
y_test_onehot = to_categorical(y_test, 10)

print(f"Preprocessed training data shape: {x_train_flat.shape}")
print(f"Preprocessed training labels shape: {y_train_onehot.shape}")
print(f"Input dimension: {x_train_flat.shape[1]}")
print(f"Number of classes: {y_train_onehot.shape[1]}")

# Create validation split
from sklearn.model_selection import train_test_split
x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
    x_train_flat, y_train_onehot, test_size=0.1, random_state=42, stratify=y_train
)

print(f"Training set size: {x_train_split.shape[0]}")
print(f"Validation set size: {x_val_split.shape[0]}")
print(f"Test set size: {x_test_flat.shape[0]}")

print("✅ Data preprocessing completed!")


📥 Loading MNIST dataset...
✅ MNIST dataset loaded!
Training data shape: (60000, 28, 28)
Test data shape: (10000, 28, 28)
🔄 Preprocessing data...
Preprocessed training data shape: (60000, 784)
Preprocessed training labels shape: (60000, 10)
Input dimension: 784
Number of classes: 10
Training set size: 54000
Validation set size: 6000
Test set size: 10000
✅ Data preprocessing completed!


In [11]:
# Build the feedforward neural network
print("🏗️ Building the neural network...")

# Clear any previous models
tf.keras.backend.clear_session()

model = models.Sequential([
    # Hidden layer 1 (first layer with input shape)
    layers.Dense(512, activation='relu', input_shape=(784,), name='hidden_1'),
    layers.Dropout(0.2),  # Dropout for regularization
    
    # Hidden layer 2
    layers.Dense(256, activation='relu', name='hidden_2'),
    layers.Dropout(0.2),
    
    # Hidden layer 3
    layers.Dense(128, activation='relu', name='hidden_3'),
    layers.Dropout(0.1),
    
    # Output layer (10 neurons for 10 classes)
    layers.Dense(10, activation='softmax', name='output_layer')
])

# Display model architecture
print("✅ Model architecture:")
model.summary()

# Visualize model architecture (optional)
try:
    tf.keras.utils.plot_model(
        model,
        to_file='model_architecture.png',
        show_shapes=True,
        show_layer_names=True,
        rankdir='TB',
        dpi=150
    )
    print("📊 Model diagram saved as 'model_architecture.png'")
except Exception as e:
    print(f"⚠️ Could not save model diagram: {e}")

print("✅ Neural network built successfully!")


🏗️ Building the neural network...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


✅ Model architecture:


You must install pydot (`pip install pydot`) for `plot_model` to work.
✅ Neural network built successfully!


In [None]:
# Compile the model
print("⚙️ Compiling the model...")

# Import the correct metrics
from tensorflow.keras.metrics import TopKCategoricalAccuracy

model.compile(
    optimizer=optimizers.Adam(learning_rate=0.001),  # Adam optimizer
    loss='categorical_crossentropy',                 # For multi-class classification
    metrics=['accuracy', TopKCategoricalAccuracy(k=3, name='top_3_accuracy')]  # Track accuracy and top-3 accuracy
)

print("✅ Model compiled successfully!")
print("\nModel configuration:")
print(f"Optimizer: Adam (lr=0.001)")
print(f"Loss function: Categorical Crossentropy")
print(f"Metrics: Accuracy, Top-3 Accuracy")

# Define callbacks (simplified to avoid potential issues)
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

print("✅ Callbacks configured (simplified to avoid issues)")
print("   - Early stopping on validation loss")
print("   - Learning rate reduction on plateau")


⚙️ Compiling the model...
✅ Model compiled successfully!

Model configuration:
Optimizer: Adam (lr=0.001)
Loss function: Categorical Crossentropy
Metrics: Accuracy, Top-3 Accuracy
✅ Callbacks configured:


In [None]:
# Train the model
print("🚀 Starting training...")

# Check data shapes before training
print(f"Training data shape: {x_train_split.shape}")
print(f"Training labels shape: {y_train_split.shape}")
print(f"Validation data shape: {x_val_split.shape}")
print(f"Validation labels shape: {y_val_split.shape}")

try:
    # Try training with callbacks first
    history = model.fit(
        x_train_split, y_train_split,
        epochs=50,
        batch_size=128,
        validation_data=(x_val_split, y_val_split),
        callbacks=callbacks,
        verbose=1
    )
    print("✅ Training completed with callbacks!")
    
except Exception as e:
    print(f"⚠️ Training with callbacks failed: {e}")
    print("🔄 Trying simple training without callbacks...")
    
    # Fallback: Simple training without callbacks
    history = model.fit(
        x_train_split, y_train_split,
        epochs=20,  # Reduced epochs for safety
        batch_size=128,
        validation_data=(x_val_split, y_val_split),
        verbose=1
    )
    print("✅ Training completed (simple mode)!")

# Save the training history
try:
    training_history = pd.DataFrame(history.history)
    training_history.to_csv('training_history.csv', index=False)
    print("📊 Training history saved to 'training_history.csv'")
except Exception as e:
    print(f"⚠️ Could not save training history: {e}")


🚀 Starting training...


Epoch 1/50
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step - accuracy: 0.8424 - loss: 0.4966 - top_3_accuracy: 0.9469
Epoch 1: val_accuracy improved from -inf to 0.95483, saving model to best_model.h5




[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 133ms/step - accuracy: 0.8426 - loss: 0.4961 - top_3_accuracy: 0.9470 - val_accuracy: 0.9548 - val_loss: 0.1515 - val_top_3_accuracy: 0.9957 - learning_rate: 0.0010
Epoch 2/50


In [None]:
# Evaluate the model on test set
print("📊 Evaluating model performance...")

test_results = model.evaluate(x_test_flat, y_test_onehot, verbose=0)
test_loss = test_results[0]
test_accuracy = test_results[1]
test_top3_accuracy = test_results[2] if len(test_results) > 2 else None

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
if test_top3_accuracy is not None:
    print(f"Test Top-3 Accuracy: {test_top3_accuracy:.4f}")
else:
    print("Top-3 Accuracy: Not available")

# Make predictions
y_pred_proba = model.predict(x_test_flat, verbose=0)
y_pred = np.argmax(y_pred_proba, axis=1)

# Generate classification report
print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Training & Validation Loss
axes[0, 0].plot(history.history['loss'], label='Training Loss', linewidth=2)
axes[0, 0].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
axes[0, 0].set_title('Model Loss', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Training & Validation Accuracy
axes[0, 1].plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
axes[0, 1].plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
axes[0, 1].set_title('Model Accuracy', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=range(10), yticklabels=range(10), ax=axes[1, 0])
axes[1, 0].set_title('Confusion Matrix', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Predicted Label')
axes[1, 0].set_ylabel('True Label')

# Per-class accuracy
class_accuracy = cm.diagonal() / cm.sum(axis=1)
axes[1, 1].bar(range(10), class_accuracy, color='skyblue', edgecolor='navy', alpha=0.7)
axes[1, 1].set_title('Per-Class Accuracy', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Digit Class')
axes[1, 1].set_ylabel('Accuracy')
axes[1, 1].set_xticks(range(10))
axes[1, 1].grid(True, alpha=0.3)
for i, acc in enumerate(class_accuracy):
    axes[1, 1].text(i, acc + 0.01, f'{acc:.3f}', ha='center', fontsize=9)

plt.tight_layout()
plt.show()

print("✅ Training history and evaluation metrics visualized!")


In [None]:
# Visualize predictions vs actual labels
def show_predictions(n_samples=16):
    """Display predictions vs actual labels for sample images"""
    indices = np.random.choice(len(x_test), n_samples, replace=False)
    
    fig, axes = plt.subplots(4, 4, figsize=(12, 12))
    fig.suptitle('Predictions vs Actual Labels', fontsize=16, fontweight='bold')
    
    for i, idx in enumerate(indices):
        row = i // 4
        col = i % 4
        
        # Get prediction
        pred_proba = y_pred_proba[idx]
        pred_label = np.argmax(pred_proba)
        actual_label = y_test[idx]
        confidence = pred_proba[pred_label]
        
        # Plot image
        axes[row, col].imshow(x_test[idx], cmap='gray')
        
        # Set title with prediction info
        color = 'green' if pred_label == actual_label else 'red'
        axes[row, col].set_title(
            f'Actual: {actual_label}\nPred: {pred_label} ({confidence:.2f})',
            color=color, fontsize=10
        )
        axes[row, col].axis('off')
    
    plt.tight_layout()
    plt.show()

show_predictions()

# Show some misclassified examples
def show_misclassified(n_samples=12):
    """Display misclassified examples"""
    misclassified_indices = np.where(y_pred != y_test)[0]
    indices = np.random.choice(misclassified_indices, n_samples, replace=False)
    
    fig, axes = plt.subplots(3, 4, figsize=(12, 9))
    fig.suptitle('Misclassified Examples', fontsize=16, fontweight='bold')
    
    for i, idx in enumerate(indices):
        row = i // 4
        col = i % 4
        
        pred_proba = y_pred_proba[idx]
        pred_label = np.argmax(pred_proba)
        actual_label = y_test[idx]
        confidence = pred_proba[pred_label]
        
        axes[row, col].imshow(x_test[idx], cmap='gray')
        axes[row, col].set_title(
            f'True: {actual_label} | Pred: {pred_label}\nConf: {confidence:.2f}',
            color='red', fontsize=10
        )
        axes[row, col].axis('off')
    
    plt.tight_layout()
    plt.show()

if len(np.where(y_pred != y_test)[0]) > 0:
    show_misclassified()
else:
    print("🎉 No misclassifications found!")

print("✅ Prediction visualization completed!")


In [None]:
# Advanced model analysis
print("🔬 Performing advanced model analysis...")

# 1. Analyze prediction confidence
confidence_scores = np.max(y_pred_proba, axis=1)
predicted_classes = np.argmax(y_pred_proba, axis=1)

plt.figure(figsize=(15, 5))

# Confidence distribution
plt.subplot(1, 3, 1)
plt.hist(confidence_scores, bins=50, alpha=0.7, edgecolor='black', color='skyblue')
plt.title('Prediction Confidence Distribution')
plt.xlabel('Confidence Score')
plt.ylabel('Number of Predictions')
plt.grid(True, alpha=0.3)

# Confidence by class
plt.subplot(1, 3, 2)
class_confidence = [confidence_scores[predicted_classes == i] for i in range(10)]
plt.boxplot(class_confidence, labels=range(10))
plt.title('Confidence by Predicted Class')
plt.xlabel('Predicted Class')
plt.ylabel('Confidence Score')
plt.grid(True, alpha=0.3)

# Accuracy vs Confidence
plt.subplot(1, 3, 3)
correct_predictions = (predicted_classes == y_test)
confidence_bins = np.linspace(0, 1, 11)
bin_centers = (confidence_bins[:-1] + confidence_bins[1:]) / 2
bin_accuracy = []

for i in range(len(confidence_bins)-1):
    mask = (confidence_scores >= confidence_bins[i]) & (confidence_scores < confidence_bins[i+1])
    if np.sum(mask) > 0:
        bin_accuracy.append(np.mean(correct_predictions[mask]))
    else:
        bin_accuracy.append(0)

plt.plot(bin_centers, bin_accuracy, 'o-', linewidth=2, markersize=8)
plt.plot([0, 1], [0, 1], 'r--', alpha=0.7, label='Perfect Calibration')
plt.title('Reliability Diagram')
plt.xlabel('Confidence Score')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 2. Analyze model weights and activations
print("\n🧠 Analyzing model internals...")

# Get weights from first layer (input -> hidden)
first_layer_weights = model.layers[0].get_weights()[0]  # Shape: (784, 512)

# Visualize some learned features
n_features_to_show = 16
feature_indices = np.random.choice(first_layer_weights.shape[1], n_features_to_show, replace=False)

fig, axes = plt.subplots(4, 4, figsize=(12, 12))
fig.suptitle('Learned Features (First Layer Weights)', fontsize=16, fontweight='bold')

for i, feature_idx in enumerate(feature_indices):
    row = i // 4
    col = i % 4
    
    # Reshape weight vector to 28x28 image
    feature_map = first_layer_weights[:, feature_idx].reshape(28, 28)
    
    # Normalize for visualization
    feature_map = (feature_map - feature_map.min()) / (feature_map.max() - feature_map.min())
    
    axes[row, col].imshow(feature_map, cmap='RdBu', interpolation='nearest')
    axes[row, col].set_title(f'Feature {feature_idx}', fontsize=10)
    axes[row, col].axis('off')

plt.tight_layout()
plt.show()

# 3. Training metrics summary
print("\n📊 Training Summary:")
final_train_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
final_train_acc = history.history['accuracy'][-1]
final_val_acc = history.history['val_accuracy'][-1]

print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Final Training Accuracy: {final_train_acc:.4f}")
print(f"Final Validation Accuracy: {final_val_acc:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Generalization Gap: {final_train_acc - test_accuracy:.4f}")

# 4. Model efficiency metrics
total_params = model.count_params()
trainable_params = sum([np.prod(layer.get_weights()[0].shape) for layer in model.layers if layer.get_weights()])

print(f"\n🔧 Model Efficiency:")
print(f"Total Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}")
print(f"Model Size (approx): {total_params * 4 / (1024*1024):.2f} MB")  # Assuming float32
print(f"Parameters per accuracy point: {total_params / test_accuracy:.0f}")

print("✅ Advanced model analysis completed!")


In [None]:
# ALTERNATIVE: Simple compilation (uncomment if needed)
"""
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
print("✅ Model compiled with basic metrics")
"""

# Calculate top-3 accuracy manually if needed
def calculate_top_k_accuracy(y_true, y_pred, k=3):
    """Calculate top-k accuracy manually"""
    y_true_labels = np.argmax(y_true, axis=1)
    top_k_pred = np.argsort(y_pred, axis=1)[:, -k:]
    
    correct = 0
    for i, true_label in enumerate(y_true_labels):
        if true_label in top_k_pred[i]:
            correct += 1
    
    return correct / len(y_true_labels)

print("📝 Alternative compilation method and manual top-k calculation ready if needed")
