<a href="https://colab.research.google.com/github/Bumble-beee/Handout_Smart_Sensing_Feb2026/blob/main/O3_example_v3_handout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup
import numpy as np
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
from tqdm import tqdm

def download_noisy_ozone(limit=480, stride=1, resize_dim=(256, 128), noise_level=0.05):
    """
    Downloads NASA Ozone frames, resizes them, and adds synthetic noise
    to simulate real-world sensor corruption.

    Returns:
        noisy_data: The corrupted dataset for analysis.
        clean_data: The ground truth for checking results.
    """
    base_url = "https://svs.gsfc.nasa.gov/vis/a000000/a003900/a003973/frames/2048x1024_2x1_30p/2004-Dec-2005-Mar/O3CHEM/"

    print(f"Connecting to NASA: {base_url}")
    try:
        response = requests.get(base_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract links
        links = sorted([base_url + a['href'] for a in soup.find_all('a')
                        if a.get('href', '').endswith(('.tif', '.png'))])

        # Select subset
        selection = links[::stride][:limit]
        print(f"Found {len(links)} total frames. Downloading {len(selection)}...")

        frames = []
        for url in tqdm(selection):
            r = requests.get(url)
            img = Image.open(BytesIO(r.content))
            # Resize and Grayscale
            img = img.resize(resize_dim).convert('L')
            # Normalize to 0.0 - 1.0
            frames.append(np.array(img, dtype=np.float32) / 255.0)

        clean_data = np.array(frames)

        # --- INJECT NOISE ---
        print(f"Injecting {noise_level*100}% Gaussian Noise...")
        noise = np.random.normal(0, noise_level, clean_data.shape)
        noisy_data = clean_data + noise

        # Clip values to stay within valid image range [0.0, 1.0]
        noisy_data = np.clip(noisy_data, 0, 1)

        return noisy_data, clean_data

    except Exception as e:
        print(f"Download failed: {e}")
        return None, None

# --- RUN DOWNLOAD ---
# We grab 480 frames (approx 1 month of data)
noisy_ozone, clean_ozone = download_noisy_ozone(limit=480, stride=1, resize_dim=(256, 128), noise_level=0.05)

if noisy_ozone is not None:
    print(f"Data Shape: {noisy_ozone.shape}") # (480, 128, 256)

    # --- VISUALIZATION CHECK ---
    # Compare Clean vs Noisy to verify the difficulty
    fig, ax = plt.subplots(1, 2, figsize=(12, 5))

    # Plot Ground Truth
    ax[0].imshow(clean_ozone[0], cmap='viridis', vmin=0, vmax=1)
    ax[0].set_title("Ground Truth (Hidden)")

    # Plot Noisy Input
    ax[1].imshow(noisy_ozone[0], cmap='viridis', vmin=0, vmax=1)
    ax[1].set_title(f"Noisy Input (Noise Level: 0.15)")

    plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

def create_comparison_animation(clean_frames, noisy_frames):
    """
    Creates a side-by-side HTML5 video comparing Clean vs Noisy data.
    """
    # Setup Figure with 2 columns
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    plt.close() # Prevent the static plot from showing in the notebook output

    # --- Left Plot: Ground Truth ---
    # We fix vmin=0, vmax=1 to keep colors consistent
    im1 = ax1.imshow(clean_frames[0], cmap='viridis', vmin=0, vmax=1)
    ax1.set_title("Ground Truth (Hidden)")
    ax1.axis('off') # Hide axes ticks for cleaner look

    # --- Right Plot: Noisy Input ---
    im2 = ax2.imshow(noisy_frames[0], cmap='viridis', vmin=0, vmax=1)
    ax2.set_title("Noisy Sensor Input")
    ax2.axis('off')

    # Add a shared main title
    title = fig.suptitle(f"Antarctic Ozone Dynamics (Frame 0)")

    def update(frame_idx):
        # Update Data for both plots
        im1.set_data(clean_frames[frame_idx])
        im2.set_data(noisy_frames[frame_idx])

        # Update Title
        title.set_text(f"Antarctic Ozone Dynamics (Frame {frame_idx})")

        return [im1, im2, title]

    print(f"Rendering Side-by-Side Animation ({len(clean_frames)} frames)...")
    # blit=False is often more stable in Colab for text updates, but True is faster
    anim = animation.FuncAnimation(fig, update, frames=len(clean_frames), interval=50, blit=False)

    return anim

# --- Run Animation ---
# Ensure you ran the download code from the previous step first!
if 'noisy_ozone' in locals() and 'clean_ozone' in locals():
    # We pass both the clean and noisy arrays we created earlier
    anim = create_comparison_animation(clean_ozone, noisy_ozone)
    display(HTML(anim.to_jshtml()))
else:
    print("Error: Data variables ('noisy_ozone', 'clean_ozone') not found.")
    print("Please run the download/noise-injection block first.")