In [9]:
# =============================================================
# STEP 1: Install Dependencies (run this only once)
# =============================================================
!pip install opencv-python-headless matplotlib tensorflow tqdm

# =============================================================
# STEP 2: Import Libraries
# =============================================================
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython.display import Video, display
import os

print("TensorFlow version:", tf.__version__)

# =============================================================
# STEP 3: Load and Preprocess MNIST Data
# =============================================================
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()

# reshape and normalize
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype("float32") / 255.0
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1).astype("float32") / 255.0

# =============================================================
# STEP 4: Build CNN Model
# =============================================================
model = keras.Sequential([
    keras.Input(shape=(28, 28, 1)),
    Conv2D(32, (3, 3), activation='relu'),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# =============================================================
# STEP 5: Train the Model
# =============================================================
print("🧠 Training model (takes about 2–3 minutes)...")
model.fit(train_images, train_labels, epochs=3, batch_size=128, verbose=1)

# Evaluate quickly
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"✅ Test Accuracy: {test_acc:.4f}")

# =============================================================
# STEP 6: Helper Function to Preprocess Each Video Frame
# =============================================================
def preprocess_frame(frame):
    """Convert video frame to normalized 28x28 grayscale for prediction."""
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (28, 28))
    gray = 1 - (gray / 255.0)  # invert colors (white digit on black background)
    gray = np.expand_dims(gray, axis=(0, -1))  # shape (1, 28, 28, 1)
    return gray

# =============================================================
# STEP 7: Load Your MNIST Dream Video
# =============================================================
video_path = "mnist_dream.mp4"  # Make sure your file is named like this
if not os.path.exists(video_path):
    raise FileNotFoundError(f"❌ File '{video_path}' not found. Please upload it here.")

cap = cv2.VideoCapture(video_path)
frame_total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"🎥 Processing {frame_total} frames...")

# =============================================================
# STEP 8: Predict and Save Annotated Output Video
# =============================================================
output_path = "mnist_predicted.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
vw = None

for _ in tqdm(range(frame_total), desc="Processing Frames"):
    ret, frame = cap.read()
    if not ret:
        break

    # preprocess and predict
    x = preprocess_frame(frame)
    preds = model.predict(x, verbose=0)[0]
    guess = np.argmax(preds)
    conf = np.max(preds) * 100

    # resize for display
    disp_frame = cv2.resize(frame, (640, 640))

    # ========= TEXT VISIBILITY FIX =========
    text = f"Prediction: {guess} ({conf:.1f}%)"
    # black outline
    cv2.putText(disp_frame, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 0), 5)
    # white text on top
    cv2.putText(disp_frame, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 255, 255), 2)

    # initialize writer
    if vw is None:
        h, w, _ = disp_frame.shape
        vw = cv2.VideoWriter(output_path, fourcc, 30.0, (w, h))

    vw.write(disp_frame)

cap.release()
if vw:
    vw.release()

print(f"\n✅ Done! Saved output video as '{output_path}'")

# =============================================================
# STEP 9: Display the Output Video Inside Notebook
# =============================================================
display(Video(output_path, embed=True))


Defaulting to user installation because normal site-packages is not writeable
TensorFlow version: 2.20.0
🧠 Training model (takes about 2–3 minutes)...
Epoch 1/3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 125ms/step - accuracy: 0.9260 - loss: 0.2434
Epoch 2/3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 128ms/step - accuracy: 0.9755 - loss: 0.0811
Epoch 3/3
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 127ms/step - accuracy: 0.9804 - loss: 0.0648
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.9874 - loss: 0.0366
✅ Test Accuracy: 0.9874
🎥 Processing 481 frames...


Processing Frames: 100%|█████████████████████████████████████████████████████████████| 481/481 [01:22<00:00,  5.80it/s]



✅ Done! Saved output video as 'mnist_predicted.mp4'
