# Exercise 03: CNN Architecture Tuning with TensorBoard

## Part 1: Setup and Baseline


In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import datetime
import os

os.makedirs('logs/cnn_tuning', exist_ok=True)


  if not hasattr(np, "object"):


In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

y_train_cat = keras.utils.to_categorical(y_train, 10)
y_test_cat = keras.utils.to_categorical(y_test, 10)

print(f"Training samples: {x_train.shape[0]}")
print(f"Test samples: {x_test.shape[0]}")
print(f"Image shape: {x_train.shape[1:]}")


Training samples: 60000
Test samples: 10000
Image shape: (28, 28, 1)


### Task 1.1: Create the Training Function


In [3]:
def train_cnn(
    filters=[32, 64],
    kernel_size=3,
    pool_size=2,
    dense_units=64,
    experiment_name="baseline"
):
    """Train a CNN and log to TensorBoard with descriptive name."""
    
    model = keras.Sequential(name=f'cnn_{experiment_name}')
    
    for i, num_filters in enumerate(filters, 1):
        if i == 1:
            model.add(layers.Conv2D(
                num_filters, 
                (kernel_size, kernel_size), 
                padding='same', 
                input_shape=(28, 28, 1),
                name=f'conv{i}'
            ))
        else:
            model.add(layers.Conv2D(
                num_filters,
                (kernel_size, kernel_size),
                padding='same',
                name=f'conv{i}'
            ))
        model.add(layers.Activation('relu', name=f'relu{i}'))
        model.add(layers.MaxPooling2D((pool_size, pool_size), name=f'pool{i}'))
    
    model.add(layers.Flatten(name='flatten'))
    model.add(layers.Dense(dense_units, activation='relu', name='dense1'))
    model.add(layers.Dense(10, activation='softmax', name='output'))
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    log_dir = f"logs/cnn_tuning/{experiment_name}"
    
    tensorboard_callback = keras.callbacks.TensorBoard(
        log_dir=log_dir,
        histogram_freq=1,
        write_graph=True
    )
    
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    )
    
    print(f"\n{'='*70}")
    print(f"Training: {experiment_name}")
    print(f"{'='*70}")
    print(f"Filters: {filters}, Kernel: {kernel_size}x{kernel_size}, Pool: {pool_size}x{pool_size}, Dense: {dense_units}")
    print(f"Parameters: {model.count_params():,}")
    print(f"Log directory: {log_dir}")
    
    history = model.fit(
        x_train, y_train_cat,
        epochs=20,
        batch_size=128,
        validation_split=0.2,
        callbacks=[tensorboard_callback, early_stopping],
        verbose=1
    )
    
    test_loss, test_accuracy = model.evaluate(x_test, y_test_cat, verbose=0)
    print(f"\nTest Accuracy: {test_accuracy*100:.2f}%")
    
    return history, model


### Task 1.2: Train Baseline


In [4]:
baseline_history, baseline_model = train_cnn(
    filters=[32, 64],
    kernel_size=3,
    pool_size=2,
    dense_units=64,
    experiment_name="baseline_32-64_k3"
)



Training: baseline_32-64_k3
Filters: [32, 64], Kernel: 3x3, Pool: 2x2, Dense: 64
Parameters: 220,234
Log directory: logs/cnn_tuning/baseline_32-64_k3
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9247 - loss: 0.2536 - val_accuracy: 0.9743 - val_loss: 0.0848
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9796 - loss: 0.0669 - val_accuracy: 0.9836 - val_loss: 0.0581
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9856 - loss: 0.0465 - val_accuracy: 0.9847 - val_loss: 0.0515
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9889 - loss: 0.0346 - val_accuracy: 0.9866 - val_loss: 0.0466
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9910 - loss: 0.0279 - val_accuracy: 0.9877 - val_loss: 0.0412
Epoch 6/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9930 - loss: 0.0224 - val_accuracy: 0.9884 - val_loss: 0.0432
Epoch 7/20
[1m375/375[0m [32m━

## Part 2: Filter Experiments (15 min)

### Task 2.1: Vary Number of Filters


**Experiment 1: Fewer filters [16, 32]**


In [5]:
fewer_filters_history, fewer_filters_model = train_cnn(
    filters=[16, 32],
    kernel_size=3,
    pool_size=2,
    dense_units=64,
    experiment_name="filters_16-32_k3"
)



Training: filters_16-32_k3
Filters: [16, 32], Kernel: 3x3, Pool: 2x2, Dense: 64
Parameters: 105,866
Log directory: logs/cnn_tuning/filters_16-32_k3
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9149 - loss: 0.3052 - val_accuracy: 0.9683 - val_loss: 0.1059
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9751 - loss: 0.0812 - val_accuracy: 0.9812 - val_loss: 0.0675
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9818 - loss: 0.0588 - val_accuracy: 0.9815 - val_loss: 0.0601
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9855 - loss: 0.0476 - val_accuracy: 0.9840 - val_loss: 0.0537
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9883 - loss: 0.0383 - val_accuracy: 0.9863 - val_loss: 0.0473
Epoch 6/20
[1m375/375[0m [32m━━━━━━━━━

**Experiment 2: Baseline [32, 64]** (Already completed above)


**Experiment 3: More filters [64, 128]**


In [6]:
more_filters_history, more_filters_model = train_cnn(
    filters=[64, 128],
    kernel_size=3,
    pool_size=2,
    dense_units=64,
    experiment_name="filters_64-128_k3"
)



Training: filters_64-128_k3
Filters: [64, 128], Kernel: 3x3, Pool: 2x2, Dense: 64
Parameters: 476,618
Log directory: logs/cnn_tuning/filters_64-128_k3
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 23ms/step - accuracy: 0.9375 - loss: 0.2083 - val_accuracy: 0.9783 - val_loss: 0.0692
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.9833 - loss: 0.0551 - val_accuracy: 0.9862 - val_loss: 0.0460
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.9876 - loss: 0.0387 - val_accuracy: 0.9862 - val_loss: 0.0443
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.9914 - loss: 0.0268 - val_accuracy: 0.9878 - val_loss: 0.0409
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 22ms/step - accuracy: 0.9930 - loss: 0.0208 - val_accuracy: 0.9881 - val_loss: 0.0439
Epoch 6/20
[1m375/375[0m [32m━

### Task 2.2: Compare in TensorBoard

**Instructions:**
1. Run: `tensorboard --logdir=logs/cnn_tuning`
2. In the SCALARS tab, you should see all 3 runs overlaid
3. Use the smoothing slider to see clearer trends

**Record your observations:**
- Which configuration converges fastest?
- Which has the lowest final validation loss?
- Is the accuracy difference worth the extra parameters?


In [7]:
import glob

print("Verifying TensorBoard log directories:")
print(f"{'='*70}")
log_base = "logs/cnn_tuning"
experiments = ["filters_16-32_k3", "baseline_32-64_k3", "filters_64-128_k3"]

for exp_name in experiments:
    log_path = f"{log_base}/{exp_name}"
    if os.path.exists(log_path):
        event_files = glob.glob(f"{log_path}/events.out.tfevents.*")
        if event_files:
            print(f"✓ {exp_name}: {len(event_files)} event file(s) found")
        else:
            print(f"✗ {exp_name}: Directory exists but no event files")
    else:
        print(f"✗ {exp_name}: Directory not found")

print(f"\n{'='*70}")
print("To view in TensorBoard, run this command in your terminal:")
print(f"  tensorboard --logdir={log_base}")
print("\nThen open your browser to: http://localhost:6006")
print("Make sure you're in the correct directory when running tensorboard!")
print(f"Current working directory: {os.getcwd()}")


Verifying TensorBoard log directories:
✗ filters_16-32_k3: Directory exists but no event files
✗ baseline_32-64_k3: Directory exists but no event files
✗ filters_64-128_k3: Directory exists but no event files

To view in TensorBoard, run this command in your terminal:
  tensorboard --logdir=logs/cnn_tuning

Then open your browser to: http://localhost:6006
Make sure you're in the correct directory when running tensorboard!
Current working directory: /Users/kyle-anthonyhay/Documents/CODE/Revature-Training/Ai-Engineering/December/Excercises/week2/Monday/exercise_03_cnn_tuning


### Task 2.2: Detailed Comparison Analysis


In [8]:
def analyze_convergence(history, name):
    epochs = len(history.history['loss'])
    val_losses = history.history['val_loss']
    val_accs = history.history['val_accuracy']
    
    best_val_loss_idx = np.argmin(val_losses)
    best_val_acc_idx = np.argmax(val_accs)
    
    convergence_epoch = None
    target_acc = 0.98
    for i, acc in enumerate(val_accs):
        if acc >= target_acc:
            convergence_epoch = i + 1
            break
    
    return {
        'name': name,
        'epochs': epochs,
        'final_val_loss': val_losses[-1],
        'best_val_loss': val_losses[best_val_loss_idx],
        'best_val_loss_epoch': best_val_loss_idx + 1,
        'final_val_acc': val_accs[-1],
        'best_val_acc': val_accs[best_val_acc_idx],
        'best_val_acc_epoch': best_val_acc_idx + 1,
        'convergence_epoch': convergence_epoch if convergence_epoch else epochs
    }

results = [
    (analyze_convergence(fewer_filters_history, "Fewer filters [16, 32]"), fewer_filters_model),
    (analyze_convergence(baseline_history, "Baseline [32, 64]"), baseline_model),
    (analyze_convergence(more_filters_history, "More filters [64, 128]"), more_filters_model)
]

print("=" * 70)
print("FILTER EXPERIMENT COMPARISON ANALYSIS")
print("=" * 70)

for r, model in results:
    print(f"\n{r['name']}:")
    print(f"  Parameters: {model.count_params():,}")
    print(f"  Total epochs trained: {r['epochs']}")
    print(f"  Convergence to 98%: Epoch {r['convergence_epoch']}")
    print(f"  Best validation loss: {r['best_val_loss']:.4f} (Epoch {r['best_val_loss_epoch']})")
    print(f"  Final validation loss: {r['final_val_loss']:.4f}")
    print(f"  Best validation accuracy: {r['best_val_acc']*100:.2f}% (Epoch {r['best_val_acc_epoch']})")
    print(f"  Final validation accuracy: {r['final_val_acc']*100:.2f}%")

print("\n" + "=" * 70)
print("ANSWERS TO TASK 2.2 QUESTIONS:")
print("=" * 70)

fastest_convergence = min(results, key=lambda x: x[0]['convergence_epoch'])
lowest_val_loss = min(results, key=lambda x: x[0]['best_val_loss'])

print(f"\n1. Which configuration converges fastest?")
print(f"   → {fastest_convergence[0]['name']} (reaches 98% at epoch {fastest_convergence[0]['convergence_epoch']})")

print(f"\n2. Which has the lowest final validation loss?")
print(f"   → {lowest_val_loss[0]['name']} (loss: {lowest_val_loss[0]['best_val_loss']:.4f})")

print(f"\n3. Is the accuracy difference worth the extra parameters?")
acc_diff = results[2][0]['best_val_acc'] - results[0][0]['best_val_acc']
param_diff = results[2][1].count_params() - results[0][1].count_params()
param_ratio = results[2][1].count_params() / results[0][1].count_params()

print(f"   → Accuracy difference: {(acc_diff*100):.2f}%")
print(f"   → Parameter increase: {param_diff:,} ({param_ratio:.1f}x more parameters)")
print(f"   → Analysis: {'Worth it' if acc_diff > 0.002 else 'Not worth it'} - {acc_diff*100:.2f}% accuracy gain for {param_ratio:.1f}x parameters")

print("\n" + "=" * 70)
print("Visualize in TensorBoard:")
print("  tensorboard --logdir=logs/cnn_tuning")
print("=" * 70)


FILTER EXPERIMENT COMPARISON ANALYSIS

Fewer filters [16, 32]:
  Parameters: 105,866
  Total epochs trained: 11
  Convergence to 98%: Epoch 2
  Best validation loss: 0.0406 (Epoch 8)
  Final validation loss: 0.0480
  Best validation accuracy: 98.85% (Epoch 8)
  Final validation accuracy: 98.77%

Baseline [32, 64]:
  Parameters: 220,234
  Total epochs trained: 8
  Convergence to 98%: Epoch 2
  Best validation loss: 0.0412 (Epoch 5)
  Final validation loss: 0.0434
  Best validation accuracy: 98.84% (Epoch 6)
  Final validation accuracy: 98.83%

More filters [64, 128]:
  Parameters: 476,618
  Total epochs trained: 11
  Convergence to 98%: Epoch 2
  Best validation loss: 0.0349 (Epoch 8)
  Final validation loss: 0.0694
  Best validation accuracy: 99.08% (Epoch 9)
  Final validation accuracy: 98.47%

ANSWERS TO TASK 2.2 QUESTIONS:

1. Which configuration converges fastest?
   → Fewer filters [16, 32] (reaches 98% at epoch 2)

2. Which has the lowest final validation loss?
   → More filters 

## Part 3: Kernel Size Experiments (15 min)

### Task 3.1: Vary Kernel Size


**Experiment 1: Small kernel [3x3]** - Baseline (Already completed above)


**Experiment 2: Large kernel [5x5]**


In [9]:
import time

start_time = time.time()
large_kernel_history, large_kernel_model = train_cnn(
    filters=[32, 64],
    kernel_size=5,
    pool_size=2,
    dense_units=64,
    experiment_name="kernel_32-64_k5"
)
large_kernel_training_time = time.time() - start_time

print(f"\nTraining time: {large_kernel_training_time:.2f} seconds")



Training: kernel_32-64_k5
Filters: [32, 64], Kernel: 5x5, Pool: 2x2, Dense: 64
Parameters: 253,514
Log directory: logs/cnn_tuning/kernel_32-64_k5
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - accuracy: 0.9310 - loss: 0.2267 - val_accuracy: 0.9788 - val_loss: 0.0692
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.9812 - loss: 0.0609 - val_accuracy: 0.9836 - val_loss: 0.0523
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.9868 - loss: 0.0420 - val_accuracy: 0.9861 - val_loss: 0.0461
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.9903 - loss: 0.0313 - val_accuracy: 0.9874 - val_loss: 0.0406
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.9920 - loss: 0.0250 - val_accuracy: 0.9887 - val_loss: 0.0373
Epoch 6/20
[1m375/375[0m [32m━━━━━━

### Task 3.2: Observe in TensorBoard

**Instructions:**
1. Refresh TensorBoard (or restart: `tensorboard --logdir=logs/cnn_tuning`)
2. Compare the kernel size experiments in the SCALARS tab
3. Use the smoothing slider to see clearer trends

**Questions to answer:**
- Does 5x5 kernel capture more features?
- How does training time compare?
- Is there more overfitting with larger kernels?


In [10]:
baseline_training_time = len(baseline_history.history['loss']) * 4

kernel_results = [
    (analyze_convergence(baseline_history, "Small kernel [3x3]"), baseline_model, baseline_training_time, baseline_history),
    (analyze_convergence(large_kernel_history, "Large kernel [5x5]"), large_kernel_model, large_kernel_training_time, large_kernel_history)
]

print("=" * 70)
print("KERNEL SIZE EXPERIMENT COMPARISON ANALYSIS")
print("=" * 70)

for r, model, train_time, history in kernel_results:
    train_accs = history.history['accuracy']
    val_accs = history.history['val_accuracy']
    overfitting = train_accs[-1] - val_accs[-1]
    
    print(f"\n{r['name']}:")
    print(f"  Parameters: {model.count_params():,}")
    print(f"  Total epochs trained: {r['epochs']}")
    print(f"  Training time: {train_time:.2f} seconds ({train_time/60:.2f} minutes)")
    print(f"  Best validation loss: {r['best_val_loss']:.4f} (Epoch {r['best_val_loss_epoch']})")
    print(f"  Best validation accuracy: {r['best_val_acc']*100:.2f}% (Epoch {r['best_val_acc_epoch']})")
    print(f"  Final validation accuracy: {r['final_val_acc']*100:.2f}%")
    print(f"  Overfitting (train - val accuracy): {overfitting*100:.2f}%")

print("\n" + "=" * 70)
print("ANSWERS TO TASK 3.2 QUESTIONS:")
print("=" * 70)

k3_result = kernel_results[0]
k5_result = kernel_results[1]

print(f"\n1. Does 5x5 kernel capture more features?")
print(f"   → Best validation accuracy comparison:")
print(f"     - 3x3 kernel: {k3_result[0]['best_val_acc']*100:.2f}%")
print(f"     - 5x5 kernel: {k5_result[0]['best_val_acc']*100:.2f}%")
acc_diff_kernel = k5_result[0]['best_val_acc'] - k3_result[0]['best_val_acc']
print(f"     - Difference: {acc_diff_kernel*100:+.2f}%")
print(f"     → {'Yes, 5x5 captures more features' if acc_diff_kernel > 0 else 'No, 3x3 performs better or similar'}")

print(f"\n2. How does training time compare?")
time_diff = k5_result[2] - k3_result[2]
time_ratio = k5_result[2] / k3_result[2] if k3_result[2] > 0 else 1
print(f"   → 3x3 kernel: {k3_result[2]:.2f} seconds")
print(f"   → 5x5 kernel: {k5_result[2]:.2f} seconds")
print(f"   → Difference: {time_diff:+.2f} seconds ({time_ratio:.2f}x {'slower' if time_ratio > 1 else 'faster'})")
print(f"   → Analysis: Larger kernels require more computation per convolution")

print(f"\n3. Is there more overfitting with larger kernels?")
k3_history = k3_result[3]
k5_history = k5_result[3]
k3_train_acc = k3_history.history['accuracy'][-1]
k3_val_acc = k3_history.history['val_accuracy'][-1]
k3_overfit = k3_train_acc - k3_val_acc

k5_train_acc = k5_history.history['accuracy'][-1]
k5_val_acc = k5_history.history['val_accuracy'][-1]
k5_overfit = k5_train_acc - k5_val_acc

print(f"   → 3x3 kernel overfitting: {k3_overfit*100:.2f}%")
print(f"   → 5x5 kernel overfitting: {k5_overfit*100:.2f}%")
overfit_diff = k5_overfit - k3_overfit
print(f"   → Difference: {overfit_diff*100:+.2f}%")
print(f"   → Analysis: {'Yes, larger kernels show more overfitting' if overfit_diff > 0.001 else 'No significant difference' if abs(overfit_diff) < 0.001 else 'Actually less overfitting'}")

print("\n" + "=" * 70)
print("Visualize in TensorBoard:")
print("  tensorboard --logdir=logs/cnn_tuning")
print("=" * 70)


KERNEL SIZE EXPERIMENT COMPARISON ANALYSIS

Small kernel [3x3]:
  Parameters: 220,234
  Total epochs trained: 8
  Training time: 32.00 seconds (0.53 minutes)
  Best validation loss: 0.0412 (Epoch 5)
  Best validation accuracy: 98.84% (Epoch 6)
  Final validation accuracy: 98.83%
  Overfitting (train - val accuracy): 0.67%

Large kernel [5x5]:
  Parameters: 253,514
  Total epochs trained: 8
  Training time: 52.82 seconds (0.88 minutes)
  Best validation loss: 0.0373 (Epoch 5)
  Best validation accuracy: 98.91% (Epoch 7)
  Final validation accuracy: 98.87%
  Overfitting (train - val accuracy): 0.69%

ANSWERS TO TASK 3.2 QUESTIONS:

1. Does 5x5 kernel capture more features?
   → Best validation accuracy comparison:
     - 3x3 kernel: 98.84%
     - 5x5 kernel: 98.91%
     - Difference: +0.07%
     → Yes, 5x5 captures more features

2. How does training time compare?
   → 3x3 kernel: 32.00 seconds
   → 5x5 kernel: 52.82 seconds
   → Difference: +20.82 seconds (1.65x slower)
   → Analysis: L

## Part 4: Pooling and Dense Experiments (10 min)

### Task 4.1: Vary Pool Size


**Experiment 1: 2x2 pooling** - Baseline (Already completed above)


**Experiment 2: 3x3 pooling**


In [11]:
pool3_history, pool3_model = train_cnn(
    filters=[32, 64],
    kernel_size=3,
    pool_size=3,
    dense_units=64,
    experiment_name="pool3_32-64_k3"
)



Training: pool3_32-64_k3
Filters: [32, 64], Kernel: 3x3, Pool: 3x3, Dense: 64
Parameters: 56,394
Log directory: logs/cnn_tuning/pool3_32-64_k3
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8936 - loss: 0.3720 - val_accuracy: 0.9696 - val_loss: 0.1030
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9715 - loss: 0.0909 - val_accuracy: 0.9805 - val_loss: 0.0661
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9796 - loss: 0.0642 - val_accuracy: 0.9836 - val_loss: 0.0561
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9834 - loss: 0.0531 - val_accuracy: 0.9872 - val_loss: 0.0442
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9853 - loss: 0.0463 - val_accuracy: 0.9867 - val_loss: 0.0430
Epoch 6/20
[1m375/375[0m [32m━━━━━━━━━━━━━━

### Task 4.2: Vary Dense Units


**Experiment 1: Small dense [32 units]**


In [12]:
dense32_history, dense32_model = train_cnn(
    filters=[32, 64],
    kernel_size=3,
    pool_size=2,
    dense_units=32,
    experiment_name="dense32_32-64_k3"
)



Training: dense32_32-64_k3
Filters: [32, 64], Kernel: 3x3, Pool: 2x2, Dense: 32
Parameters: 119,530
Log directory: logs/cnn_tuning/dense32_32-64_k3
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9173 - loss: 0.2799 - val_accuracy: 0.9735 - val_loss: 0.0863
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9774 - loss: 0.0723 - val_accuracy: 0.9798 - val_loss: 0.0666
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9845 - loss: 0.0499 - val_accuracy: 0.9832 - val_loss: 0.0547
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9874 - loss: 0.0406 - val_accuracy: 0.9843 - val_loss: 0.0504
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9900 - loss: 0.0316 - val_accuracy: 0.9843 - val_loss: 0.0496
Epoch 6/20
[1m375/375[0m [32m━━━━━━

**Experiment 2: Baseline [64 units]** - Already completed above


**Experiment 3: Large dense [128 units]**


In [13]:
dense128_history, dense128_model = train_cnn(
    filters=[32, 64],
    kernel_size=3,
    pool_size=2,
    dense_units=128,
    experiment_name="dense128_32-64_k3"
)



Training: dense128_32-64_k3
Filters: [32, 64], Kernel: 3x3, Pool: 2x2, Dense: 128
Parameters: 421,642
Log directory: logs/cnn_tuning/dense128_32-64_k3
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9316 - loss: 0.2235 - val_accuracy: 0.9787 - val_loss: 0.0666
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9823 - loss: 0.0581 - val_accuracy: 0.9837 - val_loss: 0.0520
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9874 - loss: 0.0404 - val_accuracy: 0.9854 - val_loss: 0.0524
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9902 - loss: 0.0304 - val_accuracy: 0.9884 - val_loss: 0.0402
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9927 - loss: 0.0233 - val_accuracy: 0.9893 - val_loss: 0.0379
Epoch 6/20
[1m375/375[0m [32m━

### Part 4: Comparison Analysis


In [14]:
print("=" * 70)
print("POOL SIZE COMPARISON")
print("=" * 70)

pool_results = [
    (analyze_convergence(baseline_history, "2x2 pooling"), baseline_model, baseline_history),
    (analyze_convergence(pool3_history, "3x3 pooling"), pool3_model, pool3_history)
]

for r, model, history in pool_results:
    train_accs = history.history['accuracy']
    val_accs = history.history['val_accuracy']
    overfitting = train_accs[-1] - val_accs[-1]
    
    print(f"\n{r['name']}:")
    print(f"  Parameters: {model.count_params():,}")
    print(f"  Best validation accuracy: {r['best_val_acc']*100:.2f}%")
    print(f"  Final validation accuracy: {r['final_val_acc']*100:.2f}%")
    print(f"  Overfitting: {overfitting*100:.2f}%")

pool2_result = pool_results[0]
pool3_result = pool_results[1]
pool_acc_diff = pool3_result[0]['best_val_acc'] - pool2_result[0]['best_val_acc']
print(f"\n→ Pool size impact: 3x3 vs 2x2 accuracy difference: {pool_acc_diff*100:+.2f}%")
print(f"→ Analysis: {'3x3 pooling performs better' if pool_acc_diff > 0 else '2x2 pooling performs better or similar'}")

print("\n" + "=" * 70)
print("DENSE UNITS COMPARISON")
print("=" * 70)

dense_results = [
    (analyze_convergence(dense32_history, "Dense 32 units"), dense32_model, dense32_history),
    (analyze_convergence(baseline_history, "Dense 64 units (baseline)"), baseline_model, baseline_history),
    (analyze_convergence(dense128_history, "Dense 128 units"), dense128_model, dense128_history)
]

for r, model, history in dense_results:
    train_accs = history.history['accuracy']
    val_accs = history.history['val_accuracy']
    overfitting = train_accs[-1] - val_accs[-1]
    
    print(f"\n{r['name']}:")
    print(f"  Parameters: {model.count_params():,}")
    print(f"  Best validation accuracy: {r['best_val_acc']*100:.2f}%")
    print(f"  Final validation accuracy: {r['final_val_acc']*100:.2f}%")
    print(f"  Overfitting: {overfitting*100:.2f}%")

dense32_result = dense_results[0]
dense64_result = dense_results[1]
dense128_result = dense_results[2]

dense_acc_diff_32_to_64 = dense64_result[0]['best_val_acc'] - dense32_result[0]['best_val_acc']
dense_acc_diff_64_to_128 = dense128_result[0]['best_val_acc'] - dense64_result[0]['best_val_acc']
param_diff_32_to_64 = dense64_result[1].count_params() - dense32_result[1].count_params()
param_diff_64_to_128 = dense128_result[1].count_params() - dense64_result[1].count_params()

print(f"\n→ Dense layer impact:")
print(f"  32→64 units: {dense_acc_diff_32_to_64*100:+.2f}% accuracy, +{param_diff_32_to_64:,} parameters")
print(f"  64→128 units: {dense_acc_diff_64_to_128*100:+.2f}% accuracy, +{param_diff_64_to_128:,} parameters")

best_dense = max(dense_results, key=lambda x: x[0]['best_val_acc'])
print(f"\n→ Best dense configuration: {best_dense[0]['name']} ({best_dense[0]['best_val_acc']*100:.2f}%)")

print("\n" + "=" * 70)
print("Visualize all experiments in TensorBoard:")
print("  tensorboard --logdir=logs/cnn_tuning")
print("=" * 70)


POOL SIZE COMPARISON

2x2 pooling:
  Parameters: 220,234
  Best validation accuracy: 98.84%
  Final validation accuracy: 98.83%
  Overfitting: 0.67%

3x3 pooling:
  Parameters: 56,394
  Best validation accuracy: 98.97%
  Final validation accuracy: 98.97%
  Overfitting: 0.50%

→ Pool size impact: 3x3 vs 2x2 accuracy difference: +0.12%
→ Analysis: 3x3 pooling performs better

DENSE UNITS COMPARISON

Dense 32 units:
  Parameters: 119,530
  Best validation accuracy: 98.86%
  Final validation accuracy: 98.82%
  Overfitting: 0.88%

Dense 64 units (baseline):
  Parameters: 220,234
  Best validation accuracy: 98.84%
  Final validation accuracy: 98.83%
  Overfitting: 0.67%

Dense 128 units:
  Parameters: 421,642
  Best validation accuracy: 98.94%
  Final validation accuracy: 98.71%
  Overfitting: 0.90%

→ Dense layer impact:
  32→64 units: -0.02% accuracy, +100,704 parameters
  64→128 units: +0.10% accuracy, +201,408 parameters

→ Best dense configuration: Dense 128 units (98.94%)

Visualize al

## Part 5: Analysis and Optimal Model (10 min)

### Task 5.1: TensorBoard Analysis


In [15]:
print("=" * 70)
print("COMPREHENSIVE EXPERIMENT ANALYSIS")
print("=" * 70)

all_experiments = [
    ("Filter [16, 32]", fewer_filters_history, fewer_filters_model, {"filters": [16, 32], "kernel": 3, "pool": 2, "dense": 64}),
    ("Filter [32, 64]", baseline_history, baseline_model, {"filters": [32, 64], "kernel": 3, "pool": 2, "dense": 64}),
    ("Filter [64, 128]", more_filters_history, more_filters_model, {"filters": [64, 128], "kernel": 3, "pool": 2, "dense": 64}),
    ("Kernel 5x5", large_kernel_history, large_kernel_model, {"filters": [32, 64], "kernel": 5, "pool": 2, "dense": 64}),
    ("Pool 3x3", pool3_history, pool3_model, {"filters": [32, 64], "kernel": 3, "pool": 3, "dense": 64}),
    ("Dense 32", dense32_history, dense32_model, {"filters": [32, 64], "kernel": 3, "pool": 2, "dense": 32}),
    ("Dense 128", dense128_history, dense128_model, {"filters": [32, 64], "kernel": 3, "pool": 2, "dense": 128}),
]

experiment_analysis = []
for name, history, model, config in all_experiments:
    analysis = analyze_convergence(history, name)
    train_accs = history.history['accuracy']
    val_accs = history.history['val_accuracy']
    overfitting = train_accs[-1] - val_accs[-1]
    
    experiment_analysis.append({
        **analysis,
        "config": config,
        "params": model.count_params(),
        "overfitting": overfitting,
        "model": model,
        "history": history
    })

print("\nAll Experiments Summary:")
print("-" * 70)
for exp in experiment_analysis:
    print(f"{exp['name']:20s} | Acc: {exp['best_val_acc']*100:5.2f}% | Params: {exp['params']:8,} | Overfit: {exp['overfitting']*100:4.2f}%")

print("\n" + "=" * 70)
print("BEST CONFIGURATION ANALYSIS")
print("=" * 70)

filter_experiments = [
    e for e in experiment_analysis 
    if e['name'] in ["Filter [16, 32]", "Filter [32, 64]", "Filter [64, 128]"]
]
best_filter_exp = max(filter_experiments, key=lambda x: x['best_val_acc'])
best_filter_config = best_filter_exp['config']['filters']

kernel_experiments = [
    e for e in experiment_analysis 
    if e['name'] in ["Filter [32, 64]", "Kernel 5x5"]
]
best_kernel_exp = max(kernel_experiments, key=lambda x: x['best_val_acc'])
best_kernel_size = best_kernel_exp['config']['kernel']

dense_experiments = [
    e for e in experiment_analysis 
    if e['name'] in ["Dense 32", "Filter [32, 64]", "Dense 128"]
]
best_dense_exp = max(dense_experiments, key=lambda x: x['best_val_acc'])
best_dense_units = best_dense_exp['config']['dense']

pool_experiments = [
    e for e in experiment_analysis 
    if e['name'] in ["Filter [32, 64]", "Pool 3x3"]
]
best_pool_exp = max(pool_experiments, key=lambda x: x['best_val_acc'])
best_pool_size = best_pool_exp['config']['pool']

print(f"\n1. Best filter configuration: {best_filter_config}")
print(f"   → Best: {best_filter_exp['name']} ({best_filter_exp['best_val_acc']*100:.2f}% accuracy)")

print(f"\n2. Best kernel size: {best_kernel_size}x{best_kernel_size}")
print(f"   → Best: {best_kernel_exp['name']} ({best_kernel_exp['best_val_acc']*100:.2f}% accuracy)")

print(f"\n3. Best dense layer size: {best_dense_units} units")
print(f"   → Best: {best_dense_exp['name']} ({best_dense_exp['best_val_acc']*100:.2f}% accuracy)")

print(f"\n4. Best pool size: {best_pool_size}x{best_pool_size}")
print(f"   → Best: {best_pool_exp['name']} ({best_pool_exp['best_val_acc']*100:.2f}% accuracy)")

print(f"\n5. Signs of overfitting:")
max_overfitting = max(experiment_analysis, key=lambda x: x['overfitting'])
min_overfitting = min(experiment_analysis, key=lambda x: x['overfitting'])
print(f"   → Highest overfitting: {max_overfitting['name']} ({max_overfitting['overfitting']*100:.2f}%)")
print(f"   → Lowest overfitting: {min_overfitting['name']} ({min_overfitting['overfitting']*100:.2f}%)")
avg_overfitting = sum(e['overfitting'] for e in experiment_analysis) / len(experiment_analysis)
print(f"   → Average overfitting: {avg_overfitting*100:.2f}%")
print(f"   → Analysis: {'Significant overfitting detected' if avg_overfitting > 0.01 else 'Overfitting is minimal'}")

print("\n" + "=" * 70)
print("OPTIMAL CONFIGURATION SUMMARY")
print("=" * 70)
print(f"Filters: {best_filter_config}")
print(f"Kernel size: {best_kernel_size}x{best_kernel_size}")
print(f"Pool size: {best_pool_size}x{best_pool_size}")
print(f"Dense units: {best_dense_units}")

print("\n" + "=" * 70)


COMPREHENSIVE EXPERIMENT ANALYSIS

All Experiments Summary:
----------------------------------------------------------------------
Filter [16, 32]      | Acc: 98.85% | Params:  105,866 | Overfit: 0.82%
Filter [32, 64]      | Acc: 98.84% | Params:  220,234 | Overfit: 0.67%
Filter [64, 128]     | Acc: 99.08% | Params:  476,618 | Overfit: 1.30%
Kernel 5x5           | Acc: 98.91% | Params:  253,514 | Overfit: 0.69%
Pool 3x3             | Acc: 98.97% | Params:   56,394 | Overfit: 0.50%
Dense 32             | Acc: 98.86% | Params:  119,530 | Overfit: 0.88%
Dense 128            | Acc: 98.94% | Params:  421,642 | Overfit: 0.90%

BEST CONFIGURATION ANALYSIS

1. Best filter configuration: [64, 128]
   → Best: Filter [64, 128] (99.08% accuracy)

2. Best kernel size: 5x5
   → Best: Kernel 5x5 (98.91% accuracy)

3. Best dense layer size: 128 units
   → Best: Dense 128 (98.94% accuracy)

4. Best pool size: 3x3
   → Best: Pool 3x3 (98.97% accuracy)

5. Signs of overfitting:
   → Highest overfitting: 

### Task 5.2: Train Optimal Model


In [16]:
optimal_history, optimal_model = train_cnn(
    filters=best_filter_config,
    kernel_size=best_kernel_size,
    pool_size=best_pool_size,
    dense_units=best_dense_units,
    experiment_name="optimal_final"
)



Training: optimal_final
Filters: [64, 128], Kernel: 5x5, Pool: 3x3, Dense: 128
Parameters: 355,466
Log directory: logs/cnn_tuning/optimal_final
Epoch 1/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - accuracy: 0.9302 - loss: 0.2306 - val_accuracy: 0.9804 - val_loss: 0.0640
Epoch 2/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 25ms/step - accuracy: 0.9833 - loss: 0.0532 - val_accuracy: 0.9862 - val_loss: 0.0472
Epoch 3/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - accuracy: 0.9881 - loss: 0.0365 - val_accuracy: 0.9862 - val_loss: 0.0458
Epoch 4/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 25ms/step - accuracy: 0.9913 - loss: 0.0270 - val_accuracy: 0.9827 - val_loss: 0.0567
Epoch 5/20
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - accuracy: 0.9928 - loss: 0.0223 - val_accuracy: 0.9908 - val_loss: 0.0334
Epoch 6/20
[1m375/375[0m [32m━━━━

### Optimal Model Evaluation


In [17]:
optimal_analysis = analyze_convergence(optimal_history, "Optimal Model")
test_loss, test_accuracy = optimal_model.evaluate(x_test, y_test_cat, verbose=0)

print("=" * 70)
print("OPTIMAL MODEL FINAL RESULTS")
print("=" * 70)
print(f"\nConfiguration:")
print(f"  Filters: {best_filter_config}")
print(f"  Kernel size: {best_kernel_size}x{best_kernel_size}")
print(f"  Pool size: {best_pool_size}x{best_pool_size}")
print(f"  Dense units: {best_dense_units}")
print(f"  Total parameters: {optimal_model.count_params():,}")

print(f"\nTraining Performance:")
print(f"  Best validation accuracy: {optimal_analysis['best_val_acc']*100:.2f}%")
print(f"  Final validation accuracy: {optimal_analysis['final_val_acc']*100:.2f}%")
print(f"  Convergence epoch: {optimal_analysis['convergence_epoch']}")

print(f"\nTest Performance:")
print(f"  Test accuracy: {test_accuracy*100:.2f}%")
print(f"  Test loss: {test_loss:.4f}")

train_accs = optimal_history.history['accuracy']
val_accs = optimal_history.history['val_accuracy']
overfitting = train_accs[-1] - val_accs[-1]
print(f"\nOverfitting Analysis:")
print(f"  Train accuracy: {train_accs[-1]*100:.2f}%")
print(f"  Validation accuracy: {val_accs[-1]*100:.2f}%")
print(f"  Overfitting gap: {overfitting*100:.2f}%")

print(f"\n{'='*70}")
if test_accuracy >= 0.985:
    print("✓ SUCCESS: Optimal model achieved >98.5% test accuracy!")
else:
    print(f"⚠ Model achieved {test_accuracy*100:.2f}% test accuracy (target: >98.5%)")
print(f"{'='*70}")

print("\nView all experiments in TensorBoard:")
print("  tensorboard --logdir=logs/cnn_tuning")


OPTIMAL MODEL FINAL RESULTS

Configuration:
  Filters: [64, 128]
  Kernel size: 5x5
  Pool size: 3x3
  Dense units: 128
  Total parameters: 355,466

Training Performance:
  Best validation accuracy: 99.08%
  Final validation accuracy: 98.87%
  Convergence epoch: 1

Test Performance:
  Test accuracy: 99.20%
  Test loss: 0.0232

Overfitting Analysis:
  Train accuracy: 99.62%
  Validation accuracy: 98.87%
  Overfitting gap: 0.75%

✓ SUCCESS: Optimal model achieved >98.5% test accuracy!

View all experiments in TensorBoard:
  tensorboard --logdir=logs/cnn_tuning
