In [None]:
from google.colab import drive
!fusermount -u /content/drive 2>/dev/null
!rm -rf /content/drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# ============================================================
# Colab + Google Drive SAFE Wi-Fi Spectrogram Dataset Generator
# ============================================================

import numpy as np
import os
from scipy.signal import spectrogram
from tqdm import tqdm
from datetime import datetime
from google.colab import drive
import matplotlib.pyplot as plt

# --- MOUNT DRIVE ---
drive.mount('/content/drive')

# --- SETUP OUTPUT PATH ---
drive_dir = "/content/drive/MyDrive/wifi_dataset"
os.makedirs(drive_dir, exist_ok=True)
img_dir = os.path.join(drive_dir, "images")
npz_dir = os.path.join(drive_dir, "npz")
os.makedirs(img_dir, exist_ok=True)
os.makedirs(npz_dir, exist_ok=True)

print(f" All generated data will be saved permanently at: {drive_dir}")

# --- WiFi OFDM Parameters (802.11a/g, 20 MHz channel) ---
N_FFT = 64
N_CP = 16
SC_INDICES = list(range(6, 32)) + list(range(33, 59))
N_DATA_SC = len(SC_INDICES)
SYMBOL_SAMPLES = N_FFT + N_CP  # 80 samples

def create_ofdm_symbol():
    """Generates a single time-domain 20 MHz OFDM symbol."""
    symbol_freq_centered = np.zeros(N_FFT, dtype=complex)
    real_data = np.random.choice([-1, 1], N_DATA_SC)
    imag_data = np.random.choice([-1, 1], N_DATA_SC)
    qpsk_data = (real_data + 1j * imag_data) / np.sqrt(2)
    symbol_freq_centered[SC_INDICES] = qpsk_data
    symbol_freq_ifft = np.fft.ifftshift(symbol_freq_centered)
    symbol_time = np.fft.ifft(symbol_freq_ifft)
    cp = symbol_time[-N_CP:]
    return np.concatenate([cp, symbol_time])

def generate_wifi_packet(num_symbols):
    """Generates a single WiFi packet of N symbols."""
    return np.concatenate([create_ofdm_symbol() for _ in range(num_symbols)])

# --- Simulation Parameters ---
fs = 20e6  # 20 MHz
snr_db = 10
sim_time = 0.05  # Shorter window = faster (50 ms)
center_frequency = 2437e6  # MHz

# --- Output Count ---
NUM_SAMPLES = 300  #  Adjust to 200–300 now; client can regenerate full later
print(f"Starting generation of {NUM_SAMPLES} Wi-Fi spectrograms...")

# --- Precompute master packet and signal power ---
max_packet_samples = int(0.0025 * fs)
max_symbols_needed = int(np.ceil(max_packet_samples / SYMBOL_SAMPLES))
master_packet = generate_wifi_packet(max_symbols_needed)
signal_power = np.mean(np.abs(master_packet)**2)

# --- Checkpoint file for safe resume ---
checkpoint_file = os.path.join(drive_dir, "checkpoint.txt")
start_idx = 1

if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        start_idx = int(f.read().strip()) + 1
        print(f" Resuming from sample {start_idx}...")

# --- Main Loop ---
for run_idx in tqdm(range(start_idx, NUM_SAMPLES + 1)):
    total_samples = int(sim_time * fs)
    iq_clean = np.zeros(total_samples, dtype=complex)

    # Random packet length and start time
    beacon_duration = np.random.uniform(0.0015, 0.0025)  # 1.5–2.5 ms
    packet_samples = int(beacon_duration * fs)
    start_time = np.random.uniform(0, sim_time - beacon_duration)
    start_sample = int(start_time * fs)

    # Insert packet
    iq_clean[start_sample:start_sample + packet_samples] = master_packet[:packet_samples]

    # Add noise (SNR = 10 dB)
    snr_linear = 10 ** (snr_db / 10.0)
    noise_power = signal_power / snr_linear
    noise_std = np.sqrt(noise_power / 2.0)
    noise = noise_std * (np.random.randn(total_samples) + 1j * np.random.randn(total_samples))
    iq_noisy = iq_clean + noise

    # Spectrogram
    f, t, Sxx = spectrogram(
        iq_noisy, fs=fs, nperseg=512, noverlap=256,
        return_onesided=False, detrend=False, scaling='density'
    )
    Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
    Sxx_dB_shifted = np.fft.fftshift(Sxx_dB, axes=0)

    # Save compactly to Drive (float32)
    npz_path = os.path.join(npz_dir, f"wifi_random_beacon_{run_idx:04d}.npz")
    np.savez_compressed(npz_path, spectrogram_data=Sxx_dB_shifted.astype(np.float32))

    # Optional: save simple grayscale preview (for sanity check)
    if run_idx <= 5:
        plt.imsave(os.path.join(img_dir, f"wifi_random_beacon_{run_idx:04d}.png"),
                   Sxx_dB_shifted, cmap='inferno')

    # Update checkpoint every 10 samples
    if run_idx % 10 == 0:
        with open(checkpoint_file, "w") as f:
            f.write(str(run_idx))
        drive.flush_and_unmount()  # make sure data is written to Drive
        drive.mount('/content/drive')  # re-mount immediately

print(" Dataset generation complete.")
print(f"Spectrograms saved permanently to: {npz_dir}")
print(f"Preview images saved to: {img_dir}")
print(f"Checkpoint file: {checkpoint_file}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 All generated data will be saved permanently at: /content/drive/MyDrive/wifi_dataset
Starting generation of 300 Wi-Fi spectrograms...
 Resuming from sample 71...


  4%|▍         | 10/230 [00:23<17:43,  4.84s/it]

Mounted at /content/drive


  9%|▊         | 20/230 [00:45<18:14,  5.21s/it]

Mounted at /content/drive


 13%|█▎        | 30/230 [01:04<15:24,  4.62s/it]

Mounted at /content/drive


 17%|█▋        | 40/230 [01:24<14:50,  4.69s/it]

Mounted at /content/drive


 22%|██▏       | 50/230 [01:47<16:36,  5.54s/it]

Mounted at /content/drive


 26%|██▌       | 60/230 [02:10<15:09,  5.35s/it]

Mounted at /content/drive


 30%|███       | 70/230 [02:30<13:16,  4.98s/it]

Mounted at /content/drive


 35%|███▍      | 80/230 [02:49<11:12,  4.48s/it]

Mounted at /content/drive


 39%|███▉      | 90/230 [03:09<10:58,  4.70s/it]

Mounted at /content/drive


 43%|████▎     | 100/230 [03:28<09:07,  4.21s/it]

Mounted at /content/drive


 48%|████▊     | 110/230 [03:47<08:46,  4.39s/it]

Mounted at /content/drive


 52%|█████▏    | 120/230 [04:05<07:40,  4.18s/it]

Mounted at /content/drive


 57%|█████▋    | 130/230 [04:23<06:58,  4.19s/it]

Mounted at /content/drive


 61%|██████    | 140/230 [04:41<06:00,  4.01s/it]

Mounted at /content/drive


 65%|██████▌   | 150/230 [05:01<06:04,  4.56s/it]

Mounted at /content/drive


 70%|██████▉   | 160/230 [05:18<04:47,  4.10s/it]

Mounted at /content/drive


 74%|███████▍  | 170/230 [05:39<04:47,  4.79s/it]

Mounted at /content/drive


 78%|███████▊  | 180/230 [05:56<03:23,  4.07s/it]

Mounted at /content/drive


 83%|████████▎ | 190/230 [06:16<03:03,  4.58s/it]

Mounted at /content/drive


 87%|████████▋ | 200/230 [06:36<02:25,  4.85s/it]

Mounted at /content/drive


 91%|█████████▏| 210/230 [07:01<02:00,  6.04s/it]

Mounted at /content/drive


 96%|█████████▌| 220/230 [07:21<00:45,  4.52s/it]

Mounted at /content/drive


100%|██████████| 230/230 [07:39<00:00,  2.00s/it]

Mounted at /content/drive
 Dataset generation complete.
Spectrograms saved permanently to: /content/drive/MyDrive/wifi_dataset/npz
Preview images saved to: /content/drive/MyDrive/wifi_dataset/images
Checkpoint file: /content/drive/MyDrive/wifi_dataset/checkpoint.txt





In [None]:
# ============================================================
#  Colab + Drive SAFE Bluetooth (GFSK Hopping) Spectrogram Generator
# ============================================================

import numpy as np
from scipy.signal import spectrogram, windows
import os
from tqdm import tqdm
from google.colab import drive

# --- MOUNT DRIVE ---
drive.mount('/content/drive')

# --- SAVE PATHS ---
drive_dir = "/content/drive/MyDrive/bluetooth_dataset"
npz_dir = os.path.join(drive_dir, "npz")
os.makedirs(npz_dir, exist_ok=True)

print(f" All Bluetooth spectrograms will be saved to: {npz_dir}")

# --- Simulation Parameters ---
fs = 20e6          # 20 MHz sample rate
snr_db = 10
sim_time = 0.05     # shorter (50 ms) for faster runtime in Colab
center_frequency = 2437e6  # 2.437 GHz (Wi-Fi mid-band)
total_samples = int(sim_time * fs)

# --- Bluetooth Parameters ---
symbol_rate = 1e6                     # 1 Msym/s
samples_per_symbol = int(fs / symbol_rate)  # 20
hop_rate = 1600                       # 1600 hops/s
hop_duration_sec = 1.0 / hop_rate     # 625 microseconds
hop_duration_samples = int(hop_duration_sec * fs)  # 12,500 samples
symbols_per_hop = int(hop_duration_sec * symbol_rate)  # 625 symbols

bt_channels = np.arange(2402e6, 2481e6, 1e6)
print(f"Simulating hops across {len(bt_channels)} Bluetooth Classic channels (2402–2480 MHz).")

# --- Output Config ---
NUM_SAMPLES = 300   #  Adjust (200–300 for demo, client can regen full 5000 later)
checkpoint_file = os.path.join(drive_dir, "bt_checkpoint.txt")
start_idx = 1

if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        start_idx = int(f.read().strip()) + 1
        print(f" Resuming from sample {start_idx}...")

# --- GFSK Modulation ---
def gfsk_modulate(num_symbols, samples_per_symbol, modulation_index=0.32, bt=0.5):
    bits = np.random.randint(0, 2, num_symbols)
    nrz = bits * 2 - 1
    x_rect = np.repeat(nrz, samples_per_symbol)

    gauss_len_symbols = 4
    gauss_len_samples = gauss_len_symbols * samples_per_symbol
    if gauss_len_samples % 2 == 0:
        gauss_len_samples += 1

    std_samples = 0.35 * samples_per_symbol
    gauss_window = windows.gaussian(gauss_len_samples, std=std_samples)
    gauss_window /= np.sum(gauss_window)

    x_filtered = np.convolve(x_rect, gauss_window, mode='same')
    phase_step = (np.pi * modulation_index) / samples_per_symbol
    phase = np.cumsum(x_filtered * phase_step)

    iq_signal = np.exp(1j * phase)
    return iq_signal

# --- Main Loop ---
for run_idx in tqdm(range(start_idx, NUM_SAMPLES + 1)):
    iq_clean = np.zeros(total_samples, dtype=complex)
    current_sample = 0
    signal_power_acc = 0
    num_visible_hops = 0

    total_hop_slots = int(np.floor(total_samples / hop_duration_samples))

    for _ in range(total_hop_slots):
        hop_freq_abs = np.random.choice(bt_channels)
        hop_freq_rel = hop_freq_abs - center_frequency

        # Only simulate if hop falls inside our 20 MHz observation window
        if np.abs(hop_freq_rel) < (fs / 2):
            gfsk_baseband = gfsk_modulate(symbols_per_hop, samples_per_symbol)
            t_hop = np.arange(hop_duration_samples) / fs
            carrier = np.exp(1j * 2 * np.pi * hop_freq_rel * t_hop)
            hop_signal = gfsk_baseband * carrier

            iq_clean[current_sample:current_sample + hop_duration_samples] = hop_signal
            signal_power_acc += np.mean(np.abs(hop_signal)**2)
            num_visible_hops += 1

        current_sample += hop_duration_samples

    # --- Add noise ---
    if num_visible_hops > 0:
        signal_power = signal_power_acc / num_visible_hops
    else:
        signal_power = 1e-9

    snr_linear = 10 ** (snr_db / 10.0)
    noise_power = signal_power / snr_linear
    noise_std = np.sqrt(noise_power / 2.0)
    noise = noise_std * (np.random.randn(total_samples) + 1j * np.random.randn(total_samples))
    iq_noisy = iq_clean + noise

    # --- Spectrogram ---
    f, t, Sxx = spectrogram(iq_noisy, fs=fs, nperseg=512, noverlap=256, return_onesided=False)
    Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
    Sxx_dB_shifted = np.fft.fftshift(Sxx_dB, axes=0).astype(np.float32)

    # --- Save to Drive ---
    npz_path = os.path.join(npz_dir, f"bt_spectrogram_{run_idx:04d}.npz")
    np.savez_compressed(npz_path, spectrogram_data=Sxx_dB_shifted)

    # --- Save checkpoint every 10 runs ---
    if run_idx % 10 == 0:
        with open(checkpoint_file, "w") as f:
            f.write(str(run_idx))
        drive.flush_and_unmount()
        drive.mount('/content/drive')

print(" Bluetooth spectrogram generation complete.")
print(f"Saved {NUM_SAMPLES} files to: {npz_dir}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 All Bluetooth spectrograms will be saved to: /content/drive/MyDrive/bluetooth_dataset/npz
Simulating hops across 79 Bluetooth Classic channels (2402–2480 MHz).


  3%|▎         | 10/300 [00:19<22:01,  4.56s/it]

Mounted at /content/drive


  7%|▋         | 20/300 [00:40<21:43,  4.66s/it]

Mounted at /content/drive


 10%|█         | 30/300 [00:58<18:12,  4.05s/it]

Mounted at /content/drive


 13%|█▎        | 40/300 [01:17<19:01,  4.39s/it]

Mounted at /content/drive


 17%|█▋        | 50/300 [01:35<16:50,  4.04s/it]

Mounted at /content/drive


 20%|██        | 60/300 [01:56<19:34,  4.89s/it]

Mounted at /content/drive


 23%|██▎       | 70/300 [02:19<20:47,  5.42s/it]

Mounted at /content/drive


 27%|██▋       | 80/300 [02:38<16:45,  4.57s/it]

Mounted at /content/drive


 30%|███       | 90/300 [03:00<17:33,  5.02s/it]

Mounted at /content/drive


 33%|███▎      | 100/300 [03:21<16:12,  4.86s/it]

Mounted at /content/drive


 37%|███▋      | 110/300 [03:41<14:19,  4.52s/it]

Mounted at /content/drive


 40%|████      | 120/300 [04:02<14:35,  4.87s/it]

Mounted at /content/drive


 43%|████▎     | 130/300 [04:24<14:18,  5.05s/it]

Mounted at /content/drive


 47%|████▋     | 140/300 [04:43<12:06,  4.54s/it]

Mounted at /content/drive


 50%|█████     | 150/300 [05:05<12:33,  5.02s/it]

Mounted at /content/drive


 53%|█████▎    | 160/300 [05:26<10:46,  4.62s/it]

Mounted at /content/drive


 57%|█████▋    | 170/300 [05:44<08:54,  4.11s/it]

Mounted at /content/drive


 60%|██████    | 180/300 [06:03<08:29,  4.25s/it]

Mounted at /content/drive


 63%|██████▎   | 190/300 [06:23<08:25,  4.59s/it]

Mounted at /content/drive


 67%|██████▋   | 200/300 [06:43<07:20,  4.41s/it]

Mounted at /content/drive


 70%|███████   | 210/300 [07:03<07:05,  4.73s/it]

Mounted at /content/drive


 73%|███████▎  | 220/300 [07:22<05:55,  4.44s/it]

Mounted at /content/drive


 77%|███████▋  | 230/300 [09:40<46:32, 39.90s/it]

Mounted at /content/drive


 80%|████████  | 240/300 [11:59<41:07, 41.13s/it]

Mounted at /content/drive


 83%|████████▎ | 250/300 [14:17<34:05, 40.92s/it]

Mounted at /content/drive


 87%|████████▋ | 260/300 [16:35<27:24, 41.12s/it]

Mounted at /content/drive


 90%|█████████ | 270/300 [18:53<20:26, 40.89s/it]

Mounted at /content/drive


 93%|█████████▎| 280/300 [21:11<13:37, 40.85s/it]

Mounted at /content/drive


 97%|█████████▋| 290/300 [23:29<06:49, 40.99s/it]

Mounted at /content/drive


100%|██████████| 300/300 [25:47<00:00,  5.16s/it]

Mounted at /content/drive
 Bluetooth spectrogram generation complete.
Saved 300 files to: /content/drive/MyDrive/bluetooth_dataset/npz





In [None]:
# ============================================================
# Colab + Drive Safe ZigBee Spectrogram Dataset Generator
# ============================================================

import numpy as np
from scipy.signal import spectrogram, windows
import os
from tqdm import tqdm
from google.colab import drive

# --- MOUNT DRIVE ---
drive.mount('/content/drive')

# --- OUTPUT PATHS ---
drive_dir = "/content/drive/MyDrive/zigbee_dataset"
npz_dir = os.path.join(drive_dir, "npz")
os.makedirs(npz_dir, exist_ok=True)
print(f" ZigBee spectrograms will be saved in: {npz_dir}")

# ============================================================
# --- ZigBee DSSS spreading sequences (IEEE 802.15.4) ---
# ============================================================
DSSS_TABLE = {
    0x0: np.array([1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1,
                   1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0]),
    0x1: np.array([0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
                   1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1]),
}

# ============================================================
# --- Helper Functions ---
# ============================================================
def zigbee_signal(num_symbols=100):
    chips = []
    for _ in range(num_symbols):
        nibble = np.random.choice(list(DSSS_TABLE.keys()))
        chips.extend(DSSS_TABLE[nibble])
    return np.array(chips)

def oqpsk_modulate(chips, samples_per_chip=10):
    chips_pm = chips * 2 - 1
    i_chips = chips_pm[0::2]
    q_chips = chips_pm[1::2]
    i_signal = np.repeat(i_chips, samples_per_chip)
    q_signal = np.repeat(q_chips, samples_per_chip)
    q_signal = np.roll(q_signal, samples_per_chip // 2)
    return i_signal + 1j * q_signal

# ============================================================
# --- Simulation Parameters ---
# ============================================================
fs = 20e6
snr_db = 10
sim_time = 0.05  # shorter for Colab efficiency
center_frequency = 2437e6
chip_rate = 2e6
samples_per_chip = int(fs / chip_rate)
beacon_duration = 0.004064  # ~4 ms ZigBee frame

# ============================================================
# --- Configurable number of samples ---
# ============================================================
NUM_SAMPLES = 300  #  Adjust for demo (client can regenerate full 5000)
checkpoint_file = os.path.join(drive_dir, "zigbee_checkpoint.txt")
start_idx = 1

if os.path.exists(checkpoint_file):
    with open(checkpoint_file, "r") as f:
        start_idx = int(f.read().strip()) + 1
        print(f" Resuming from sample {start_idx}...")

# ============================================================
# --- Main Simulation Loop ---
# ============================================================
for run_idx in tqdm(range(start_idx, NUM_SAMPLES + 1)):
    total_samples = int(sim_time * fs)
    iq_clean = np.zeros(total_samples, dtype=complex)

    # --- Generate random ZigBee packet ---
    packet_samples = int(beacon_duration * fs)
    total_chips_needed = int(np.ceil((packet_samples * 2) / samples_per_chip))
    symbols_needed = int(np.ceil(total_chips_needed / 32))
    master_chips = zigbee_signal(num_symbols=symbols_needed)
    master_packet = oqpsk_modulate(master_chips, samples_per_chip)

    signal_power = np.mean(np.abs(master_packet) ** 2)

    # --- Random start time ---
    latest_start_time = sim_time - beacon_duration
    start_time = np.random.uniform(0, latest_start_time)
    start_sample = int(start_time * fs)
    end_sample = start_sample + packet_samples
    if end_sample > total_samples:
        end_sample = total_samples
        packet_samples = end_sample - start_sample

    iq_clean[start_sample:end_sample] = master_packet[:packet_samples]

    # --- Add noise ---
    snr_linear = 10 ** (snr_db / 10.0)
    noise_power = signal_power / snr_linear
    noise_std = np.sqrt(noise_power / 2.0)
    noise = noise_std * (np.random.randn(total_samples) + 1j * np.random.randn(total_samples))
    iq_noisy = iq_clean + noise

    # --- Spectrogram ---
    f, t, Sxx = spectrogram(iq_noisy, fs=fs, nperseg=512, noverlap=256, return_onesided=False)
    Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
    Sxx_dB_shifted = np.fft.fftshift(Sxx_dB, axes=0).astype(np.float32)

    # --- Save matrix ---
    npz_path = os.path.join(npz_dir, f"zigbee_spectrogram_{run_idx:04d}.npz")
    np.savez_compressed(npz_path, spectrogram_data=Sxx_dB_shifted)

    # --- Checkpoint every 10 runs ---
    if run_idx % 10 == 0:
        with open(checkpoint_file, "w") as f:
            f.write(str(run_idx))
        drive.flush_and_unmount()
        drive.mount('/content/drive')

print(" ZigBee dataset generation complete.")
print(f"Saved {NUM_SAMPLES} files in: {npz_dir}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 ZigBee spectrograms will be saved in: /content/drive/MyDrive/zigbee_dataset/npz


  3%|▎         | 10/300 [02:18<3:19:39, 41.31s/it]

Mounted at /content/drive


  7%|▋         | 20/300 [04:37<3:11:58, 41.14s/it]

Mounted at /content/drive


 10%|█         | 30/300 [06:55<3:03:49, 40.85s/it]

Mounted at /content/drive


 13%|█▎        | 40/300 [09:13<2:58:10, 41.12s/it]

Mounted at /content/drive


 17%|█▋        | 50/300 [11:31<2:50:53, 41.02s/it]

Mounted at /content/drive


 20%|██        | 60/300 [13:50<2:44:57, 41.24s/it]

Mounted at /content/drive


 23%|██▎       | 70/300 [16:08<2:37:22, 41.05s/it]

Mounted at /content/drive


 27%|██▋       | 80/300 [18:24<2:28:38, 40.54s/it]

Mounted at /content/drive


 30%|███       | 90/300 [20:42<2:24:10, 41.19s/it]

Mounted at /content/drive


 33%|███▎      | 100/300 [23:00<2:16:48, 41.04s/it]

Mounted at /content/drive


 37%|███▋      | 110/300 [25:19<2:10:40, 41.27s/it]

Mounted at /content/drive


 40%|████      | 120/300 [27:39<2:04:59, 41.66s/it]

Mounted at /content/drive


 43%|████▎     | 130/300 [29:56<1:55:49, 40.88s/it]

Mounted at /content/drive


 47%|████▋     | 140/300 [32:14<1:49:05, 40.91s/it]

Mounted at /content/drive


 50%|█████     | 150/300 [34:33<1:42:54, 41.16s/it]

Mounted at /content/drive


 53%|█████▎    | 160/300 [36:51<1:35:17, 40.84s/it]

Mounted at /content/drive


 57%|█████▋    | 170/300 [39:09<1:28:48, 40.99s/it]

Mounted at /content/drive


 60%|██████    | 180/300 [41:27<1:21:58, 40.98s/it]

Mounted at /content/drive


 63%|██████▎   | 190/300 [43:46<1:15:41, 41.29s/it]

Mounted at /content/drive


 67%|██████▋   | 200/300 [46:04<1:08:32, 41.12s/it]

Mounted at /content/drive


 70%|███████   | 210/300 [48:20<1:00:51, 40.58s/it]

Mounted at /content/drive


 73%|███████▎  | 220/300 [50:40<55:37, 41.72s/it]

Mounted at /content/drive


 77%|███████▋  | 230/300 [52:56<47:09, 40.41s/it]

Mounted at /content/drive


 80%|████████  | 240/300 [55:12<40:32, 40.54s/it]

Mounted at /content/drive


 83%|████████▎ | 250/300 [57:28<33:41, 40.42s/it]

Mounted at /content/drive


 87%|████████▋ | 260/300 [59:44<27:01, 40.53s/it]

Mounted at /content/drive


 90%|█████████ | 270/300 [1:02:02<20:32, 41.08s/it]

Mounted at /content/drive


 93%|█████████▎| 280/300 [1:04:18<13:34, 40.72s/it]

Mounted at /content/drive


 97%|█████████▋| 290/300 [1:06:34<06:44, 40.48s/it]

Mounted at /content/drive


100%|██████████| 300/300 [1:08:50<00:00, 13.77s/it]

Mounted at /content/drive
 ZigBee dataset generation complete.
Saved 300 files in: /content/drive/MyDrive/zigbee_dataset/npz





In [None]:
import numpy as np
from scipy.signal import spectrogram
import os
from tqdm import tqdm

# ============================================================
# --- Configuration ---
# ============================================================
fs = 20e6
snr_db = 10
sim_time = 0.05
center_frequency = 2437e6
num_samples = int(sim_time * fs)
N_SAMPLES = 300  # fixed 300

# ============================================================
# --- Paths ---
# ============================================================
base_path = "/content/drive/MyDrive"
dataset_dir = os.path.join(base_path, "wifi_zigbee_dataset")
npz_dir = os.path.join(dataset_dir, "npz")
os.makedirs(npz_dir, exist_ok=True)

# Skip generation if already complete
existing = [f for f in os.listdir(npz_dir) if f.endswith(".npz")]
if len(existing) >= N_SAMPLES:
    print(f"Dataset already complete with {len(existing)} files in: {npz_dir}")
else:
    print(f"Generating {N_SAMPLES - len(existing)} Wi-Fi + ZigBee spectrograms...")

    # ============================================================
    # --- WiFi OFDM Parameters ---
    # ============================================================
    N_FFT = 64
    N_CP = 16
    SC_INDICES = list(range(6, 32)) + list(range(33, 59))
    N_DATA_SC = len(SC_INDICES)
    WIFI_SYMBOL_SAMPLES = N_FFT + N_CP

    def create_ofdm_symbol():
        symbol_freq_centered = np.zeros(N_FFT, dtype=complex)
        re = np.random.choice([-1, 1], N_DATA_SC)
        im = np.random.choice([-1, 1], N_DATA_SC)
        qpsk_data = (re + 1j * im) / np.sqrt(2)
        symbol_freq_centered[SC_INDICES] = qpsk_data
        symbol_freq_ifft = np.fft.ifftshift(symbol_freq_centered)
        symbol_time = np.fft.ifft(symbol_freq_ifft)
        cp = symbol_time[-N_CP:]
        return np.concatenate([cp, symbol_time])

    def generate_wifi_packet(num_symbols):
        return np.concatenate([create_ofdm_symbol() for _ in range(num_symbols)])

    # ============================================================
    # --- ZigBee DSSS (OQPSK) ---
    # ============================================================
    DSSS_TABLE = {
        0x0: np.array([1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,1,
                       1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0]),
        0x1: np.array([0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,
                       1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1]),
    }

    def zigbee_signal(num_symbols=100):
        chips = []
        for _ in range(num_symbols):
            nib = np.random.choice(list(DSSS_TABLE.keys()))
            chips.extend(DSSS_TABLE[nib])
        return np.array(chips)

    def oqpsk_modulate(chips, samples_per_chip=10):
        chips_pm = chips * 2 - 1
        i_chips = chips_pm[0::2]
        q_chips = chips_pm[1::2]
        i_sig = np.repeat(i_chips, samples_per_chip)
        q_sig = np.repeat(q_chips, samples_per_chip)
        q_sig = np.roll(q_sig, samples_per_chip // 2)
        return i_sig + 1j * q_sig

    # ============================================================
    # --- Precompute Packets ---
    # ============================================================
    wifi_symbols_needed = int(np.ceil((0.0025 * fs) / WIFI_SYMBOL_SAMPLES))
    master_wifi_packet = generate_wifi_packet(wifi_symbols_needed)

    zigbee_duration = 0.004064
    zigbee_samples_per_chip = int(fs / 2e6)
    zigbee_chips_needed = int(np.ceil((zigbee_duration * fs * 2) / zigbee_samples_per_chip))
    zigbee_symbols_needed = int(np.ceil(zigbee_chips_needed / 32))
    master_zb_chips = zigbee_signal(zigbee_symbols_needed)
    master_zb_packet = oqpsk_modulate(master_zb_chips, zigbee_samples_per_chip)

    wifi_channels = [2412e6, 2437e6, 2462e6]
    zigbee_channels = 2405e6 + 5e6 * np.arange(11, 27)

    def place_carrier(x, fs, f_sig, f_obs):
        t = np.arange(x.size) / fs
        return x * np.exp(1j * 2 * np.pi * (f_sig - f_obs) * t)

    # ============================================================
    # --- Main Generation Loop ---
    # ============================================================
    for i in tqdm(range(len(existing), N_SAMPLES), total=N_SAMPLES, initial=len(existing)):
        iq_clean = np.zeros(num_samples, dtype=complex)

        # --- Wi-Fi burst ---
        wifi_dur = np.random.uniform(0.0015, 0.0025)
        wifi_samples = int(wifi_dur * fs)
        wifi_start = np.random.randint(0, num_samples - wifi_samples)
        wifi_sig = master_wifi_packet[:wifi_samples]
        wifi_scale = 10 ** (np.random.uniform(-5, +5) / 20)
        wifi_center = np.random.choice(wifi_channels)
        wifi_sig = place_carrier(wifi_sig * wifi_scale, fs, wifi_center, center_frequency)
        iq_clean[wifi_start:wifi_start + wifi_samples] += wifi_sig

        # --- ZigBee burst ---
        zb_samples = int(zigbee_duration * fs)
        zb_start = np.random.randint(0, num_samples - zb_samples)
        zb_sig = master_zb_packet[:zb_samples]
        zb_scale = 10 ** (np.random.uniform(-10, +3) / 20)
        zb_center = np.random.choice(zigbee_channels)
        zb_sig = place_carrier(zb_sig * zb_scale, fs, zb_center, center_frequency)
        iq_clean[zb_start:zb_start + zb_samples] += zb_sig

        # --- Add noise ---
        total_power = np.mean(np.abs(iq_clean)**2)
        snr_linear = 10 ** (snr_db / 10)
        noise_power = total_power / snr_linear
        noise_std = np.sqrt(noise_power / 2)
        noise = noise_std * (np.random.randn(num_samples) + 1j * np.random.randn(num_samples))
        iq_noisy = iq_clean + noise

        # --- Spectrogram ---
        f, t, Sxx = spectrogram(iq_noisy, fs=fs, nperseg=512, noverlap=256, return_onesided=False)
        Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
        Sxx_dB = np.fft.fftshift(Sxx_dB, axes=0).astype(np.float32)

        # --- Save ---
        np.savez_compressed(
            os.path.join(npz_dir, f"wifi_zigbee_{i:04d}.npz"),
            spectrogram_data=Sxx_dB,
            fs=fs,
            f_center=center_frequency,
            label="wifi+zigbee",
            wifi_center=wifi_center,
            zigbee_center=zb_center,
            wifi_scale=wifi_scale,
            zigbee_scale=zb_scale,
            snr_db=snr_db
        )

    print(f"Saved {N_SAMPLES} files in: {npz_dir}")

print(f"Spectrograms saved permanently to: {npz_dir}")

Generating 300 Wi-Fi + ZigBee spectrograms...


100%|██████████| 300/300 [03:40<00:00,  1.36it/s]

✅ Saved 300 files in: /content/drive/MyDrive/wifi_zigbee_dataset/npz
Spectrograms saved permanently to: /content/drive/MyDrive/wifi_zigbee_dataset/npz





In [None]:
import numpy as np
from scipy.signal import spectrogram, windows
import os
from tqdm import tqdm

# ============================================================
# --- Configuration ---
# ============================================================
fs = 20e6
snr_db = 10
sim_time = 0.05
num_samples = int(sim_time * fs)
center_frequency = 2437e6
snr_linear = 10 ** (snr_db / 10)
N_SAMPLES = 300

# ============================================================
# --- Paths ---
# ============================================================
base_path = "/content/drive/MyDrive"
dataset_dir = os.path.join(base_path, "bluetooth_zigbee_dataset")
npz_dir = os.path.join(dataset_dir, "npz")
os.makedirs(npz_dir, exist_ok=True)

# --- Skip generation if already done ---
existing = [f for f in os.listdir(npz_dir) if f.endswith(".npz")]
if len(existing) >= N_SAMPLES:
    print(f"Dataset already complete with {len(existing)} files in: {npz_dir}")
else:
    print(f"Generating {N_SAMPLES - len(existing)} Bluetooth + ZigBee spectrograms...")

    # ============================================================
    # --- Bluetooth GFSK ---
    # ============================================================
    def gfsk_modulate(num_symbols, samples_per_symbol, modulation_index=0.32):
        bits = np.random.randint(0, 2, num_symbols)
        nrz = bits * 2 - 1
        x_rect = np.repeat(nrz, samples_per_symbol)
        g_len = 4 * samples_per_symbol + 1
        std = 0.35 * samples_per_symbol
        g = windows.gaussian(g_len, std=std)
        g /= np.sum(g)
        x_filtered = np.convolve(x_rect, g, mode='same')
        phase_step = (np.pi * modulation_index) / samples_per_symbol
        phase = np.cumsum(x_filtered * phase_step)
        return np.exp(1j * phase)

    bt_symbol_rate = 1e6
    bt_samples_per_symbol = int(fs / bt_symbol_rate)
    hop_rate = 1600
    hop_duration_sec = 1.0 / hop_rate
    hop_duration_samples = int(hop_duration_sec * fs)
    bt_symbols_per_hop = int(hop_duration_sec * bt_symbol_rate)
    bt_channels = np.arange(2402e6, 2481e6, 1e6)
    total_hop_slots = int(np.floor(num_samples / hop_duration_samples))

    # ============================================================
    # --- ZigBee OQPSK ---
    # ============================================================
    DSSS_TABLE = {
        0x0: np.array([1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,1,
                       1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0]),
        0x1: np.array([0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,
                       1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1]),
    }

    def zigbee_signal(num_symbols=100):
        chips = []
        for _ in range(num_symbols):
            nibble = np.random.choice(list(DSSS_TABLE.keys()))
            chips.extend(DSSS_TABLE[nibble])
        return np.array(chips)

    def oqpsk_modulate(chips, samples_per_chip=10):
        chips_pm = chips * 2 - 1
        i_chips = chips_pm[0::2]
        q_chips = chips_pm[1::2]
        i_signal = np.repeat(i_chips, samples_per_chip)
        q_signal = np.repeat(q_chips, samples_per_chip)
        q_signal = np.roll(q_signal, samples_per_chip // 2)
        return i_signal + 1j * q_signal

    zigbee_chip_rate = 2e6
    zigbee_samples_per_chip = int(fs / zigbee_chip_rate)
    zigbee_duration = 0.004064
    zigbee_channels = 2405e6 + 5e6 * np.arange(11, 27)
    zigbee_chips_needed = int(np.ceil((zigbee_duration * fs * 2) / zigbee_samples_per_chip))
    zigbee_symbols_needed = int(np.ceil(zigbee_chips_needed / 32))
    master_zb_chips = zigbee_signal(zigbee_symbols_needed)
    master_zb_packet = oqpsk_modulate(master_zb_chips, zigbee_samples_per_chip)[:int(zigbee_duration * fs)]

    def place_carrier(x, fs, f_sig, f_obs):
        t = np.arange(x.size) / fs
        return x * np.exp(1j * 2 * np.pi * (f_sig - f_obs) * t)

    # ============================================================
    # --- Main Loop ---
    # ============================================================
    for run_idx in tqdm(range(len(existing), N_SAMPLES), total=N_SAMPLES, initial=len(existing)):
        iq_clean = np.zeros(num_samples, dtype=complex)
        signal_power_acc = 0
        num_visible_hops = 0
        cur_sample = 0

        # --- Bluetooth Hops ---
        for _ in range(total_hop_slots):
            f_bt_abs = np.random.choice(bt_channels)
            f_bt_rel = f_bt_abs - center_frequency
            if np.abs(f_bt_rel) < (fs / 2):
                gfsk = gfsk_modulate(bt_symbols_per_hop, bt_samples_per_symbol)
                t_hop = np.arange(hop_duration_samples) / fs
                carrier = np.exp(1j * 2 * np.pi * f_bt_rel * t_hop)
                hop_signal = gfsk * carrier
                power_scale_bt = 10 ** (np.random.uniform(-8, +5) / 20)
                hop_signal *= power_scale_bt
                iq_clean[cur_sample:cur_sample + hop_duration_samples] = hop_signal
                signal_power_acc += np.mean(np.abs(hop_signal)**2)
                num_visible_hops += 1
            cur_sample += hop_duration_samples

        # --- ZigBee Burst ---
        zb_samples = int(zigbee_duration * fs)
        zb_start = np.random.randint(0, num_samples - zb_samples)
        zb_sig = master_zb_packet[:zb_samples].copy()
        power_scale_zb = 10 ** (np.random.uniform(-10, +3) / 20)
        f_zb = np.random.choice(zigbee_channels)
        zb_sig = place_carrier(zb_sig * power_scale_zb, fs, f_zb, center_frequency)
        iq_clean[zb_start:zb_start + zb_samples] += zb_sig

        # --- Add noise ---
        total_power = np.mean(np.abs(iq_clean)**2)
        noise_power = total_power / snr_linear
        noise_std = np.sqrt(noise_power / 2)
        noise = noise_std * (np.random.randn(num_samples) + 1j * np.random.randn(num_samples))
        iq_noisy = iq_clean + noise

        # --- Spectrogram ---
        f, t, Sxx = spectrogram(iq_noisy, fs=fs, nperseg=512, noverlap=256, return_onesided=False)
        Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
        Sxx_dB = np.fft.fftshift(Sxx_dB, axes=0).astype(np.float32)

        # --- Save ---
        np.savez_compressed(
            os.path.join(npz_dir, f"bluetooth_zigbee_{run_idx:04d}.npz"),
            spectrogram_data=Sxx_dB,
            fs=fs,
            f_center=center_frequency,
            label="bluetooth+zigbee",
            bt_power_scale=power_scale_bt,
            zb_power_scale=power_scale_zb,
            bt_visible_hops=num_visible_hops,
            f_zigbee=f_zb,
            snr_db=snr_db
        )

    print(f"Saved {N_SAMPLES} files in: {npz_dir}")

print(f"Spectrograms saved permanently to: {npz_dir}")



Generating 300 Bluetooth + ZigBee spectrograms...


100%|██████████| 300/300 [03:48<00:00,  1.32it/s]

✅ Saved 300 files in: /content/drive/MyDrive/bluetooth_zigbee_dataset/npz
Spectrograms saved permanently to: /content/drive/MyDrive/bluetooth_zigbee_dataset/npz





In [None]:
import numpy as np
from scipy.signal import spectrogram, windows
import os
from tqdm import tqdm

# ============================================================
# --- Configuration ---
# ============================================================
fs = 20e6
snr_db = 10
sim_time = 0.05
num_samples = int(sim_time * fs)
center_frequency = 2437e6
snr_linear = 10 ** (snr_db / 10)
N_SAMPLES = 300

# ============================================================
# --- Paths ---
# ============================================================
base_path = "/content/drive/MyDrive"
dataset_dir = os.path.join(base_path, "wifi_bluetooth_zigbee_dataset")
npz_dir = os.path.join(dataset_dir, "npz")
os.makedirs(npz_dir, exist_ok=True)

# --- Skip generation if already done ---
existing = [f for f in os.listdir(npz_dir) if f.endswith(".npz")]
if len(existing) >= N_SAMPLES:
    print(f"Dataset already complete with {len(existing)} files in: {npz_dir}")
    print(f"Spectrograms saved permanently to: {npz_dir}")
    exit()
else:
    print(f"Generating {N_SAMPLES - len(existing)} Wi-Fi + Bluetooth + ZigBee spectrograms...")

# ============================================================
# --- Wi-Fi (OFDM, 802.11a/g) ---
# ============================================================
N_FFT, N_CP = 64, 16
SC_INDICES = list(range(6, 32)) + list(range(33, 59))
N_DATA_SC = len(SC_INDICES)
WIFI_SYMBOL_SAMPLES = N_FFT + N_CP
wifi_channels = [2412e6, 2437e6, 2462e6]

def create_ofdm_symbol():
    sym_freq = np.zeros(N_FFT, dtype=complex)
    re = np.random.choice([-1, 1], N_DATA_SC)
    im = np.random.choice([-1, 1], N_DATA_SC)
    qpsk_data = (re + 1j * im) / np.sqrt(2)
    sym_freq[SC_INDICES] = qpsk_data
    sym_time = np.fft.ifft(np.fft.ifftshift(sym_freq))
    return np.concatenate([sym_time[-N_CP:], sym_time])

def generate_wifi_packet(num_symbols=100):
    return np.concatenate([create_ofdm_symbol() for _ in range(num_symbols)])

# ============================================================
# --- Bluetooth (GFSK, 1 Msym/s, 1600 hops/s) ---
# ============================================================
def gfsk_modulate(num_symbols, samples_per_symbol, modulation_index=0.32):
    bits = np.random.randint(0, 2, num_symbols)
    nrz = bits * 2 - 1
    x_rect = np.repeat(nrz, samples_per_symbol)
    g_len = 4 * samples_per_symbol + 1
    std = 0.35 * samples_per_symbol
    g = windows.gaussian(g_len, std=std)
    g /= np.sum(g)
    x_filt = np.convolve(x_rect, g, mode="same")
    phase_step = (np.pi * modulation_index) / samples_per_symbol
    phase = np.cumsum(x_filt * phase_step)
    return np.exp(1j * phase)

bt_symbol_rate = 1e6
bt_samples_per_symbol = int(fs / bt_symbol_rate)
hop_rate = 1600
hop_duration_sec = 1.0 / hop_rate
hop_duration_samples = int(hop_duration_sec * fs)
bt_symbols_per_hop = int(hop_duration_sec * bt_symbol_rate)
bt_channels = np.arange(2402e6, 2481e6, 1e6)
total_hop_slots = int(np.floor(num_samples / hop_duration_samples))

# ============================================================
# --- ZigBee (OQPSK, 2 MHz) ---
# ============================================================
DSSS_TABLE = {
    0x0: np.array([1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,1,
                   1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0]),
    0x1: np.array([0,1,1,1,1,1,0,0,1,1,0,0,0,1,0,0,
                   1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1]),
}

def zigbee_signal(num_symbols=100):
    chips = []
    for _ in range(num_symbols):
        nib = np.random.choice(list(DSSS_TABLE.keys()))
        chips.extend(DSSS_TABLE[nib])
    return np.array(chips)

def oqpsk_modulate(chips, samples_per_chip=10):
    chips_pm = chips * 2 - 1
    i_chips = chips_pm[0::2]
    q_chips = chips_pm[1::2]
    i_signal = np.repeat(i_chips, samples_per_chip)
    q_signal = np.repeat(q_chips, samples_per_chip)
    q_signal = np.roll(q_signal, samples_per_chip // 2)
    return i_signal + 1j * q_signal

zigbee_chip_rate = 2e6
zigbee_samples_per_chip = int(fs / zigbee_chip_rate)
zigbee_dur = 0.004064
zigbee_channels = 2405e6 + 5e6 * np.arange(11, 27)
zigbee_chips_needed = int(np.ceil((zigbee_dur * fs * 2) / zigbee_samples_per_chip))
zigbee_symbols_needed = int(np.ceil(zigbee_chips_needed / 32))
master_zb_chips = zigbee_signal(zigbee_symbols_needed)
master_zb_packet = oqpsk_modulate(master_zb_chips, zigbee_samples_per_chip)[:int(zigbee_dur * fs)]

# ============================================================
# --- Helper Functions ---
# ============================================================
def place_carrier(x, fs, f_sig, f_obs):
    t = np.arange(x.size) / fs
    return x * np.exp(1j * 2 * np.pi * (f_sig - f_obs) * t)

def safe_insert(buffer, signal, start_idx):
    end_idx = min(start_idx + len(signal), len(buffer))
    if end_idx <= start_idx:
        return 0
    sig_len = end_idx - start_idx
    buffer[start_idx:end_idx] += signal[:sig_len]
    return np.mean(np.abs(signal[:sig_len])**2)

# ============================================================
# --- Main Generation Loop ---
# ============================================================
for i in tqdm(range(len(existing), N_SAMPLES), total=N_SAMPLES, initial=len(existing)):
    iq_clean = np.zeros(num_samples, dtype=complex)
    total_power = 0

    # --- Wi-Fi ---
    wifi_dur = np.random.uniform(0.0015, 0.0025)
    wifi_samples = int(wifi_dur * fs)
    wifi_start = np.random.randint(0, num_samples)
    wifi_scale = 10 ** (np.random.uniform(-5, +5) / 20)
    wifi_center = np.random.choice(wifi_channels)
    wifi_sig = generate_wifi_packet(100)[:wifi_samples] * wifi_scale
    wifi_sig = place_carrier(wifi_sig, fs, wifi_center, center_frequency)
    total_power += safe_insert(iq_clean, wifi_sig, wifi_start)

    # --- Bluetooth ---
    cur_sample = 0
    for _ in range(total_hop_slots):
        f_bt_abs = np.random.choice(bt_channels)
        f_bt_rel = f_bt_abs - center_frequency
        if np.abs(f_bt_rel) < (fs / 2):
            gfsk = gfsk_modulate(bt_symbols_per_hop, bt_samples_per_symbol)
            t_hop = np.arange(hop_duration_samples) / fs
            carrier = np.exp(1j * 2 * np.pi * f_bt_rel * t_hop)
            bt_scale = 10 ** (np.random.uniform(-8, +5) / 20)
            hop_signal = gfsk * carrier * bt_scale
            total_power += safe_insert(iq_clean, hop_signal, cur_sample)
        cur_sample += hop_duration_samples

    # --- ZigBee ---
    zb_samples = int(zigbee_dur * fs)
    zb_start = np.random.randint(0, num_samples)
    zb_scale = 10 ** (np.random.uniform(-10, +3) / 20)
    zb_center = np.random.choice(zigbee_channels)
    zb_sig = place_carrier(master_zb_packet[:zb_samples] * zb_scale, fs, zb_center, center_frequency)
    total_power += safe_insert(iq_clean, zb_sig, zb_start)

    # --- Add noise ---
    noise_power = total_power / snr_linear
    noise_std = np.sqrt(noise_power / 2)
    noise = noise_std * (np.random.randn(num_samples) + 1j * np.random.randn(num_samples))
    iq_noisy = iq_clean + noise

    # --- Spectrogram ---
    f, t, Sxx = spectrogram(iq_noisy, fs=fs, nperseg=512, noverlap=256, return_onesided=False)
    Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
    Sxx_dB = np.fft.fftshift(Sxx_dB, axes=0).astype(np.float32)

    # --- Save ---
    np.savez_compressed(
        os.path.join(npz_dir, f"wifi_bt_zigbee_{i:04d}.npz"),
        spectrogram_data=Sxx_dB,
        fs=fs,
        f_center=center_frequency,
        label="wifi+bluetooth+zigbee",
        wifi_center=wifi_center,
        bt_scale=bt_scale,
        zb_center=zb_center,
        wifi_scale=wifi_scale,
        zb_scale=zb_scale,
        snr_db=snr_db
    )

print(f"Saved {N_SAMPLES} files in: {npz_dir}")
print(f"Spectrograms saved permanently to: {npz_dir}")



Generating 300 Wi-Fi + Bluetooth + ZigBee spectrograms...


100%|██████████| 300/300 [03:52<00:00,  1.29it/s]

✅ Saved 300 files in: /content/drive/MyDrive/wifi_bluetooth_zigbee_dataset/npz
Spectrograms saved permanently to: /content/drive/MyDrive/wifi_bluetooth_zigbee_dataset/npz





In [None]:
import numpy as np
from scipy.signal import spectrogram, windows, firwin
import os
from tqdm import tqdm

# ============================================================
# --- CONFIGURATION ---
# ============================================================
fs = 20e6
snr_db = 10
sim_time = 0.105
num_samples = int(sim_time * fs)
center_frequency = 2437e6
snr_linear = 10 ** (snr_db / 10)
N_SAMPLES = 300

# ============================================================
# --- PATHS ---
# ============================================================
base_path = "/content/drive/MyDrive"
dataset_dir = os.path.join(base_path, "wifi_bluetooth_dataset")
npz_dir = os.path.join(dataset_dir, "npz")
os.makedirs(npz_dir, exist_ok=True)

existing = [f for f in os.listdir(npz_dir) if f.endswith(".npz")]
if len(existing) >= N_SAMPLES:
    print(f"Dataset already complete with {len(existing)} files in: {npz_dir}")
    print(f"Spectrograms saved permanently to: {npz_dir}")
    exit()
else:
    print(f"Generating {N_SAMPLES - len(existing)} Wi-Fi + Bluetooth spectrograms...")

# ============================================================
# --- BLUETOOTH PARAMETERS ---
# ============================================================
bt_symbol_rate = 1e6
bt_samples_per_symbol = int(fs / bt_symbol_rate)
hop_rate = 1600
hop_duration_sec = 1.0 / hop_rate
hop_duration_samples = int(hop_duration_sec * fs)
bt_symbols_per_hop = int(hop_duration_sec * bt_symbol_rate)
bt_channels_abs = np.arange(2402e6, 2481e6, 1e6)
total_hop_slots = int(np.floor(num_samples / hop_duration_samples))

# ============================================================
# --- WI-FI PARAMETERS ---
# ============================================================
wifi_bandwidth = 10e6
nyquist = fs / 2.0
cutoff_norm = (wifi_bandwidth / 2.0) / nyquist
num_taps = 101
fir_taps = firwin(num_taps, cutoff_norm, window="hamming")
wifi_channels_abs = np.array([
    2412e6, 2417e6, 2422e6, 2427e6, 2432e6, 2437e6,
    2442e6, 2447e6, 2452e6, 2457e6, 2462e6
])

# ============================================================
# --- FUNCTIONS ---
# ============================================================
def gfsk_modulate(num_symbols, samples_per_symbol, modulation_index=0.32):
    """Generate baseband GFSK signal."""
    bits = np.random.randint(0, 2, num_symbols)
    nrz = bits * 2 - 1
    x_rect = np.repeat(nrz, samples_per_symbol)
    g_len = 4 * samples_per_symbol + 1
    std = 0.35 * samples_per_symbol
    g = windows.gaussian(g_len, std=std)
    g /= np.sum(g)
    x_filt = np.convolve(x_rect, g, mode="same")
    phase_step = (np.pi * modulation_index) / samples_per_symbol
    phase = np.cumsum(x_filt * phase_step)
    return np.exp(1j * phase)

def generate_bluetooth_signal():
    """Simulate Bluetooth FHSS GFSK across hops."""
    iq_bt = np.zeros(num_samples, dtype=complex)
    current_sample = 0
    for _ in range(total_hop_slots):
        hop_freq_abs = np.random.choice(bt_channels_abs)
        hop_freq_rel = hop_freq_abs - center_frequency
        if np.abs(hop_freq_rel) < fs / 2:
            gfsk = gfsk_modulate(bt_symbols_per_hop, bt_samples_per_symbol)
            t = np.arange(hop_duration_samples) / fs
            carrier = np.exp(1j * 2 * np.pi * hop_freq_rel * t)
            hop_signal = gfsk * carrier
            end_idx = min(current_sample + hop_duration_samples, num_samples)
            iq_bt[current_sample:end_idx] = hop_signal[:end_idx - current_sample]
        current_sample += hop_duration_samples
    return iq_bt

def generate_wifi_burst():
    """Simulate Wi-Fi burst using filtered wideband noise."""
    iq_wifi = np.zeros(num_samples, dtype=complex)
    wifi_freq_abs = np.random.choice(wifi_channels_abs)
    wifi_freq_rel = wifi_freq_abs - center_frequency
    burst_duration = np.random.uniform(0.001, 0.005)
    burst_samples = int(burst_duration * fs)
    start_sample = np.random.randint(0, num_samples - burst_samples)
    wideband_noise = (np.random.randn(burst_samples) + 1j * np.random.randn(burst_samples)) / np.sqrt(2)
    filtered = np.convolve(wideband_noise, fir_taps, mode="same")
    t = np.arange(burst_samples) / fs
    carrier = np.exp(1j * 2 * np.pi * wifi_freq_rel * t)
    modulated = filtered * carrier
    iq_wifi[start_sample:start_sample + burst_samples] = modulated
    return iq_wifi, wifi_freq_abs

# ============================================================
# --- MAIN GENERATION LOOP ---
# ============================================================
for run_idx in tqdm(range(len(existing), N_SAMPLES), initial=len(existing), total=N_SAMPLES):
    iq_bt = generate_bluetooth_signal()
    iq_wifi, wifi_freq = generate_wifi_burst()

    # --- Normalize & combine ---
    bt_power = np.mean(np.abs(iq_bt)**2)
    wifi_power = np.mean(np.abs(iq_wifi)**2)
    iq_bt_norm = iq_bt / np.sqrt(bt_power + 1e-12)
    iq_wifi_norm = iq_wifi / np.sqrt(wifi_power + 1e-12)
    wifi_scale = np.random.uniform(1.2, 1.8)
    bt_scale = np.random.uniform(0.8, 1.2)
    iq_clean = (wifi_scale * iq_wifi_norm) + (bt_scale * iq_bt_norm)

    # --- Add AWGN ---
    sig_power = np.mean(np.abs(iq_clean)**2)
    noise_power = sig_power / snr_linear
    noise_std = np.sqrt(noise_power / 2)
    noise = noise_std * (np.random.randn(num_samples) + 1j * np.random.randn(num_samples))
    iq_noisy = iq_clean + noise

    # --- Spectrogram ---
    f, t, Sxx = spectrogram(iq_noisy, fs=fs, nperseg=1024, noverlap=512, return_onesided=False)
    Sxx_dB = 10 * np.log10(np.abs(Sxx) + 1e-12)
    Sxx_dB = np.fft.fftshift(Sxx_dB, axes=0).astype(np.float32)

    # --- Save ---
    np.savez_compressed(
        os.path.join(npz_dir, f"wifi_bluetooth_{run_idx:04d}.npz"),
        spectrogram_data=Sxx_dB,
        fs=fs,
        f_center=center_frequency,
        wifi_freq=wifi_freq,
        wifi_scale=wifi_scale,
        bt_scale=bt_scale,
        snr_db=snr_db,
        label="wifi+bluetooth"
    )

print(f"Saved {N_SAMPLES} files in: {npz_dir}")
print(f"Spectrograms saved permanently to: {npz_dir}")


Generating 300 Wi-Fi + Bluetooth spectrograms...


100%|██████████| 300/300 [08:34<00:00,  1.72s/it]

✅ Saved 300 files in: /content/drive/MyDrive/wifi_bluetooth_dataset/npz
Spectrograms saved permanently to: /content/drive/MyDrive/wifi_bluetooth_dataset/npz





CNN Model 1 (Majority Signal Detector)

In [None]:
# ============================================================
# CNN-1: Majority Signal Detector (Hackathon-Ready Robust Version)
# ============================================================

import os, glob, random
import numpy as np
from PIL import Image
from typing import Dict
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

# ============================================================
# --- PATHS ---
# ============================================================
DATA_ROOT = "/content/drive/MyDrive"
FOLDERS = {
    "wifi":              "wifi_dataset/npz",
    "bt":                "bluetooth_dataset/npz",
    "zb":                "zigbee_dataset/npz",
    "wifi_bt":           "wifi_bluetooth_dataset/npz",
    "zb_bt":             "bluetooth_zigbee_dataset/npz",
    "wifi_zb":           "wifi_zigbee_dataset/npz",
    "wifi_bt_zb":        "wifi_bluetooth_zigbee_dataset/npz",
}

# ============================================================
# --- CLASS MAP (7 → 3)
# ============================================================
MAJORITY_MAP = {
    "wifi": 0, "bt": 1, "zb": 2,
    "wifi_bt": 0, "zb_bt": 2, "wifi_zb": 0, "wifi_bt_zb": 0,
}
IDX2NAME_3 = {0: "wifi", 1: "bluetooth", 2: "zigbee"}
NPZ_KEY = "spectrogram_data"

# ============================================================
# --- CONFIG ---
# ============================================================
IMG_SIZE = 160
BATCH_SIZE = 64
EPOCHS = 30
LR = 3e-4
VAL_SPLIT, TEST_SPLIT = 0.1, 0.1
LABEL_SMOOTH = 0.15
PATIENCE = 6
NUM_WORKERS = 0
WEIGHT_DECAY = 1e-3
SEED = 1337

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# ============================================================
# --- DATASET ---
# ============================================================
class SpectrogramDataset(Dataset):
    def __init__(self, root, folders, majormap, img_size=160, augment=True):
        self.samples = []
        self.majormap = majormap
        self.img_size = img_size
        self.augment = augment

        for key, subpath in folders.items():
            folder_path = os.path.join(root, subpath)
            if not os.path.exists(folder_path):
                raise FileNotFoundError(f"Missing folder: {folder_path}")
            files = glob.glob(os.path.join(folder_path, "*.npz"))
            for p in files:
                self.samples.append((p, key))
        if len(self.samples) == 0:
            raise RuntimeError("No .npz files found in dataset folders.")

        self.train_tf = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
            transforms.RandomApply([transforms.GaussianBlur(3)], p=0.3),
            transforms.ToTensor(),
            transforms.RandomErasing(p=0.4, scale=(0.02, 0.25), value=0),
        ])
        self.eval_tf = transforms.Compose([
            transforms.Resize((img_size, img_size)),
            transforms.ToTensor(),
        ])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, key = self.samples[idx]
        arr = np.load(path)[NPZ_KEY].astype(np.float32)
        arr = (arr - arr.mean()) / (arr.std() + 1e-6)
        im = Image.fromarray(arr).convert("F")
        t = self.train_tf(im) if self.augment else self.eval_tf(im)
        return t, self.majormap[key], key

# ============================================================
# --- SPLIT HELPERS ---
# ============================================================
def split_indices(n, val_ratio=0.1, test_ratio=0.1):
    idxs = list(range(n))
    random.shuffle(idxs)
    n_test = int(n * test_ratio)
    n_val = int(n * val_ratio)
    return idxs[n_test+n_val:], idxs[n_test:n_test+n_val], idxs[:n_test]

class SubsetDataset(Dataset):
    def __init__(self, base, idxs, aug=False):
        self.base, self.idxs, self.aug = base, idxs, aug
    def __len__(self): return len(self.idxs)
    def __getitem__(self, i):
        old = self.base.augment
        self.base.augment = self.aug
        out = self.base[self.idxs[i]]
        self.base.augment = old
        return out

# ============================================================
# --- MODEL ---
# ============================================================
class MobileNetV3Small3(nn.Module):
    def __init__(self, pretrained=True, dropout=0.4):
        super().__init__()
        weights = MobileNet_V3_Small_Weights.IMAGENET1K_V1 if pretrained else None
        m = mobilenet_v3_small(weights=weights)
        # 1-channel input
        first_conv = m.features[0][0]
        new_conv = nn.Conv2d(1, first_conv.out_channels,
                             kernel_size=first_conv.kernel_size,
                             stride=first_conv.stride,
                             padding=first_conv.padding,
                             bias=False)
        if pretrained:
            with torch.no_grad():
                new_conv.weight[:] = first_conv.weight.mean(dim=1, keepdim=True)
        m.features[0][0] = new_conv
        # The actual final feature size for mobilenet_v3_small is 576, not 1024.
        m.classifier = nn.Sequential(
            nn.Linear(576, 256),
            nn.Hardswish(),
            nn.Dropout(dropout),
            nn.Linear(256, 3)
        )

        self.m = m
    def forward(self, x): return self.m(x)

class LabelSmoothingCE(nn.Module):
    def __init__(self, smoothing=0.15):
        super().__init__()
        self.smoothing = smoothing
    def forward(self, logits, target):
        n_class = logits.size(1)
        logprobs = F.log_softmax(logits, dim=1)
        with torch.no_grad():
            true_dist = torch.zeros_like(logprobs)
            true_dist.fill_(self.smoothing / (n_class - 1))
            true_dist.scatter_(1, target.unsqueeze(1), 1.0 - self.smoothing)
        return torch.mean(torch.sum(-true_dist * logprobs, dim=1))

# ============================================================
# --- TRAIN / EVAL ---
# ============================================================
def train_epoch(model, loader, opt, criterion, device):
    model.train()
    total, correct, loss_sum = 0, 0, 0
    for x, y, _ in loader:
        x, y = x.to(device), y.to(device)
        opt.zero_grad()
        with torch.cuda.amp.autocast():
            logits = model(x)
            loss = criterion(logits, y)
        loss.backward()
        opt.step()
        loss_sum += loss.item() * x.size(0)
        correct += (logits.argmax(1) == y).sum().item()
        total += x.size(0)
    return loss_sum/total, correct/total

@torch.no_grad()
def eval_epoch(model, loader, criterion, device):
    model.eval()
    total, correct, loss_sum = 0, 0, 0
    yt, yp = [], []
    for x, y, _ in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)
        loss_sum += loss.item() * x.size(0)
        pred = logits.argmax(1)
        correct += (pred == y).sum().item()
        total += x.size(0)
        yt.extend(y.cpu().numpy()); yp.extend(pred.cpu().numpy())
    return loss_sum/total, correct/total, np.array(yt), np.array(yp)

# ============================================================
# --- MAIN ---
# ============================================================
def main():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Device:", device)

    base_ds = SpectrogramDataset(DATA_ROOT, FOLDERS, MAJORITY_MAP, img_size=IMG_SIZE, augment=True)
    n = len(base_ds)
    tr_idx, va_idx, te_idx = split_indices(n, VAL_SPLIT, TEST_SPLIT)
    tr_ds = SubsetDataset(base_ds, tr_idx, True)
    va_ds = SubsetDataset(base_ds, va_idx, False)
    te_ds = SubsetDataset(base_ds, te_idx, False)

    train_loader = DataLoader(tr_ds, BATCH_SIZE, True, num_workers=NUM_WORKERS)
    val_loader   = DataLoader(va_ds, BATCH_SIZE, False, num_workers=NUM_WORKERS)
    test_loader  = DataLoader(te_ds, BATCH_SIZE, False, num_workers=NUM_WORKERS)

    # --- Class weights (handles imbalance)
    y_train = [base_ds.majormap[key] for _, key in base_ds.samples]
    weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
    weights = torch.tensor(weights, dtype=torch.float32).to(device)
    print("Class weights:", weights.cpu().numpy())

    model = MobileNetV3Small3(pretrained=True, dropout=0.4).to(device)
    for p in model.m.features[:3].parameters():  # allow adaptation
        p.requires_grad = False

    criterion = nn.CrossEntropyLoss(weight=weights)
    opt = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                            lr=LR, weight_decay=WEIGHT_DECAY)
    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=EPOCHS)

    best_val, patience = float("inf"), PATIENCE
    best_path = "majority_mnv3s_best.pt"

    for ep in range(1, EPOCHS+1):
        tr_loss, tr_acc = train_epoch(model, train_loader, opt, criterion, device)
        va_loss, va_acc, _, _ = eval_epoch(model, val_loader, criterion, device)
        sched.step()
        print(f"Epoch {ep:02d}: train {tr_loss:.4f}/{tr_acc:.4f} | val {va_loss:.4f}/{va_acc:.4f}")
        if va_loss < best_val - 1e-4:
            best_val = va_loss
            torch.save(model.state_dict(), best_path)
            patience = PATIENCE
            print("  ↪ Saved best model.")
        else:
            patience -= 1
            if patience == 0:
                print("Early stopping.")
                break

    # --- TEST ---
    model.load_state_dict(torch.load(best_path, map_location=device))
    te_loss, te_acc, yt, yp = eval_epoch(model, test_loader, criterion, device)
    print(f"\nTest Loss {te_loss:.4f} | Acc {te_acc:.4f}")
    print(classification_report(yt, yp, target_names=[IDX2NAME_3[i] for i in range(3)], digits=4))
    print("Confusion Matrix:")
    print(confusion_matrix(yt, yp))

    # --- ONNX EXPORT ---
    !pip install -q onnx
    dummy = torch.randn(1,1,IMG_SIZE,IMG_SIZE).to(device)
    torch.onnx.export(model, dummy, "majority_mnv3s_best.onnx",
                      input_names=["spectrogram"], output_names=["logits"],
                      opset_version=12, do_constant_folding=True)
    print("\nExported best model:")
    print("  - PyTorch: majority_mnv3s_best.pt")
    print("  - ONNX:    majority_mnv3s_best.onnx")

if __name__ == "__main__":
    main()



Device: cuda
Class weights: [0.5833333 2.3333333 1.1666666]
Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth


100%|██████████| 9.83M/9.83M [00:00<00:00, 142MB/s]
  with torch.cuda.amp.autocast():


Epoch 01: train 0.5682/0.7518 | val 0.6534/0.8476
  ↪ Saved best model.
Epoch 02: train 0.0458/0.9887 | val 0.8519/0.7762
Epoch 03: train 0.0218/0.9940 | val 2.0653/0.6857
Epoch 04: train 0.0212/0.9940 | val 1.5512/0.7286
Epoch 05: train 0.0231/0.9940 | val 1.6645/0.7286
Epoch 06: train 0.0113/0.9952 | val 2.0097/0.7143
Epoch 07: train 0.0022/1.0000 | val 2.0614/0.7048
Early stopping.

Test Loss 0.6452 | Acc 0.8952
              precision    recall  f1-score   support

        wifi     0.9310    0.8852    0.9076       122
   bluetooth     1.0000    0.8800    0.9362        25
      zigbee     0.8056    0.9206    0.8593        63

    accuracy                         0.8952       210
   macro avg     0.9122    0.8953    0.9010       210
weighted avg     0.9016    0.8952    0.8965       210

Confusion Matrix:
[[108   0  14]
 [  3  22   0]
 [  5   0  58]]
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m119.3 MB/s[0m eta [36m0:00:00[0m
[?25h

  torch.onnx.export(model, dummy, "majority_mnv3s_best.onnx",



✅ Exported best model:
  - PyTorch: majority_mnv3s_best.pt
  - ONNX:    majority_mnv3s_best.onnx


In [None]:
from google.colab import drive
drive.flush_and_unmount()


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


CNN Model 2 (Interference Detector)

In [None]:
NUM_WORKERS = 0
pin_memory = False


In [None]:
# ============================================================
# CNN-2: Interference Detector (7 folders → 4 classes), OOD-hardened
# - Robust aug: SpecAugment (time/freq mask), RR-Crop, jitter, noise
# - MixUp, label smoothing, weight decay, grad clip, AMP
# - Class-balanced sampler
# - Cosine LR w/ warmup
# - Dynamic classification_report labels
# ============================================================

import os, glob, random, math
from typing import Dict, List, Tuple
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights

from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

# ---------------- CONFIG ----------------
DATA_ROOT = "/content/drive/MyDrive"
FOLDERS = {
    "wifi":              "wifi_dataset/npz",
    "bt":                "bluetooth_dataset/npz",
    "zb":                "zigbee_dataset/npz",
    "wifi_bt":           "wifi_bluetooth_dataset/npz",
    "zb_bt":             "bluetooth_zigbee_dataset/npz",
    "wifi_zb":           "wifi_zigbee_dataset/npz",
    "wifi_bt_zb":        "wifi_bluetooth_zigbee_dataset/npz",
}

INTERF_IDX = {"none": 0, "wifi": 1, "bt": 2, "zb": 3}

# Policy mapping (leave as-is if you’re not using metadata)
INTERF_MAP_POLICY = {
    "wifi": "none",
    "bt": "none",
    "zb": "none",
    "wifi_bt": "bt",
    "zb_bt": "bt",
    "wifi_zb": "zb",
    "wifi_bt_zb": "bt",
}

# If your .npz has *_scale metadata, set True to compute interference = 2nd strongest
USE_METADATA_FOR_LABELS = False

# Train config
IMG_SIZE = 160
BATCH_SIZE = 64
EPOCHS = 25
BASE_LR = 3e-4
WARMUP_EPOCHS = 2
WEIGHT_DECAY = 1e-3
VAL_SPLIT, TEST_SPLIT = 0.1, 0.1
LABEL_SMOOTH = 0.1
PATIENCE = 5
NUM_WORKERS = 0
SEED = 1337
APPLY_NONE_THRESHOLD = True
NONE_THRESH = 0.45
NPZ_KEY = "spectrogram_data"
GRAD_CLIP_NORM = 1.0
MIXUP_ALPHA = 0.2    # set 0.0 to disable
USE_BALANCED_SAMPLER = True
pin_memory = False

# Aug toggles
AUG_RANDOM_RESIZED_CROP = True
AUG_FREQ_MASKS = 2          # SpecAugment: number of freq masks
AUG_TIME_MASKS = 2          # SpecAugment: number of time masks
AUG_FREQ_MASK_PCT = 0.12
AUG_TIME_MASK_PCT = 0.12
AUG_JITTER_STD = 0.02       # add small gaussian noise to image

# ---------------- Seed ----------------
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# ---------------- Aug helpers ----------------
def specaugment_np(arr: np.ndarray,
                   n_freq=2, n_time=2,
                   freq_mask_pct=0.12, time_mask_pct=0.12) -> np.ndarray:
    """Apply SpecAugment masks on a HxW spectrogram (float32)."""
    H, W = arr.shape
    a = arr.copy()
    # Frequency masks
    for _ in range(n_freq):
        width = max(1, int(freq_mask_pct * H))
        f0 = np.random.randint(0, max(1, H - width + 1))
        a[f0:f0+width, :] = 0.0
    # Time masks
    for _ in range(n_time):
        width = max(1, int(time_mask_pct * W))
        t0 = np.random.randint(0, max(1, W - width + 1))
        a[:, t0:t0+width] = 0.0
    return a

def random_resized_crop_np(arr: np.ndarray, out_h: int, out_w: int,
                           scale=(0.8, 1.0), ratio=(0.9, 1.1)) -> np.ndarray:
    H, W = arr.shape
    area = H * W
    for _ in range(10):
        target_area = area * np.random.uniform(*scale)
        log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
        aspect = math.exp(np.random.uniform(*log_ratio))
        h = int(round(math.sqrt(target_area * aspect)))
        w = int(round(math.sqrt(target_area / aspect)))
        if 0 < h <= H and 0 < w <= W:
            i = np.random.randint(0, H - h + 1)
            j = np.random.randint(0, W - w + 1)
            crop = arr[i:i+h, j:j+w]
            return np.array(Image.fromarray(crop).resize((out_w, out_h), Image.BILINEAR))
    # Fallback: just resize
    return np.array(Image.fromarray(arr).resize((out_w, out_h), Image.BILINEAR))

# ---------------- Dataset ----------------
class InterfDataset(Dataset):
    def __init__(self, root: str, folders: Dict[str, str],
                 interf_map_policy: Dict[str, str],
                 img_size=160, augment=True):
        self.samples: List[Tuple[str, str]] = []
        self.img_size = img_size
        self.augment = augment
        self.interf_map_policy = interf_map_policy

        for key, subpath in folders.items():
            folder = os.path.join(root, subpath)
            if not os.path.isdir(folder):
                raise FileNotFoundError(f"Missing folder: {folder}")
            files = glob.glob(os.path.join(folder, "*.npz"))
            for p in files:
                self.samples.append((p, key))
        if not self.samples:
            raise RuntimeError("No .npz files found.")

    def __len__(self): return len(self.samples)

    def _np_to_tensor(self, arr: np.ndarray, train=True) -> torch.Tensor:
        arr = np.asarray(arr, dtype=np.float32)
        # Per-sample z-norm (robust to absolute dB ranges)
        m, s = arr.mean(), arr.std()
        if s < 1e-6: s = 1.0
        arr = (arr - m) / s

        # Random resized crop (keep global patterns but vary scale/position)
        if train and AUG_RANDOM_RESIZED_CROP:
            arr = random_resized_crop_np(arr, IMG_SIZE, IMG_SIZE, scale=(0.8, 1.0), ratio=(0.9, 1.1))
        else:
            arr = np.array(Image.fromarray(arr).resize((IMG_SIZE, IMG_SIZE), Image.BILINEAR))

        # SpecAugment masks
        if train and (AUG_FREQ_MASKS > 0 or AUG_TIME_MASKS > 0):
            arr = specaugment_np(arr, AUG_FREQ_MASKS, AUG_TIME_MASKS, AUG_FREQ_MASK_PCT, AUG_TIME_MASK_PCT)

        # Small Gaussian jitter
        if train and AUG_JITTER_STD > 0:
            arr = arr + np.random.randn(*arr.shape).astype(np.float32) * AUG_JITTER_STD

        # Occasional horizontal flip (time reversal)
        if train and random.random() < 0.5:
            arr = np.ascontiguousarray(arr[:, ::-1])

        t = torch.from_numpy(arr).unsqueeze(0)  # [1,H,W]
        return t

    def _infer_label_from_policy(self, seven_key: str) -> int:
        return INTERF_IDX[self.interf_map_policy[seven_key]]

    def _infer_label_from_metadata(self, z, seven_key: str) -> int:
        w = float(z.get("wifi_scale", 0.0))
        b = float(z.get("bt_scale", z.get("bluetooth_scale", 0.0)))
        zb = float(z.get("zb_scale", z.get("zigbee_scale", 0.0)))
        triples = [("wifi", abs(w)), ("bt", abs(b)), ("zb", abs(zb))]
        triples.sort(key=lambda kv: kv[1], reverse=True)
        if triples[0][1] <= 1e-9:i5`
            return INTERF_IDX["none"]
        return INTERF_IDX[triples[1][0]]  # second-strongest

    def __getitem__(self, idx: int):
        path, key = self.samples[idx]
        z = np.load(path, allow_pickle=True)
        arr = z[NPZ_KEY]
        x = self._np_to_tensor(arr, train=self.augment)
        if USE_METADATA_FOR_LABELS:
            try:
                y = self._infer_label_from_metadata(z, key)
            except Exception:
                y = self._infer_label_from_policy(key)
        else:
            y = self._infer_label_from_policy(key)
        return x, int(y), key

# ---------------- Split + Sampler ----------------
def split_indices(n, val_ratio=0.1, test_ratio=0.1):
    idxs = list(range(n))
    random.shuffle(idxs)
    n_test = int(n * test_ratio)
    n_val = int(n * val_ratio)
    return idxs[n_test+n_val:], idxs[n_test:n_test+n_val], idxs[:n_test]

class SubsetDataset(Dataset):
    def __init__(self, base: Dataset, idxs: List[int], aug: bool):
        self.base, self.idxs, self.aug = base, idxs, aug
    def __len__(self): return len(self.idxs)
    def __getitem__(self, i):
        old = self.base.augment
        self.base.augment = self.aug
        out = self.base[self.idxs[i]]
        self.base.augment = old
        return out

def make_weights_for_balanced_sampler(ds: Dataset, idxs: List[int]) -> List[float]:
    counts = [0,0,0,0]
    labels = []
    for i in idxs:
        _, y, _ = ds[i]
        labels.append(y); counts[y] += 1
    counts = [max(c,1) for c in counts]
    weights = [1.0 / counts[y] for y in labels]
    return weights

# ---------------- Model ----------------
class MobileNetV3Small4(nn.Module):
    def __init__(self, pretrained=True, dropout_p=0.2):
        super().__init__()
        weights = MobileNet_V3_Small_Weights.IMAGENET1K_V1 if pretrained else None
        m = mobilenet_v3_small(weights=weights)
        # adapt first conv to 1ch
        first_conv = m.features[0][0]
        new_conv = nn.Conv2d(1, first_conv.out_channels,
                             kernel_size=first_conv.kernel_size,
                             stride=first_conv.stride,
                             padding=first_conv.padding, bias=False)
        with torch.no_grad():
            if pretrained:
                new_conv.weight[:] = first_conv.weight.mean(1, keepdim=True)
        m.features[0][0] = new_conv
        in_f = m.classifier[-1].in_features
        # add stronger dropout before head
        m.classifier[-1] = nn.Identity()
        self.m = m
        self.head = nn.Sequential(
            nn.Dropout(p=dropout_p),
            nn.Linear(in_f, 4)
        )
    def forward(self, x):
        feats = self.m(x)
        return self.head(feats)

class LabelSmoothingCE(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing
    def forward(self, logits, target):
        n_class = logits.size(1)
        logprobs = F.log_softmax(logits, dim=1)
        with torch.no_grad():
            true_dist = torch.zeros_like(logprobs)
            true_dist.fill_(self.smoothing / (n_class - 1))
            true_dist.scatter_(1, target.unsqueeze(1), 1.0 - self.smoothing)
        return torch.mean(torch.sum(-true_dist * logprobs, dim=1))

def apply_none_threshold(logits: torch.Tensor, thr: float):
    probs = F.softmax(logits, dim=1)
    maxp, pred = probs.max(1)
    pred = pred.clone()
    pred[maxp < thr] = 0
    return pred

# ---------------- MixUp ----------------
def mixup_data(x, y, alpha=MIXUP_ALPHA):
    if alpha <= 0.0:  # disabled
        return x, y, 1.0, y
    lam = np.random.beta(alpha, alpha)
    bsz = x.size(0)
    index = torch.randperm(bsz, device=x.device)
    mixed_x = lam * x + (1 - lam) * x[index]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, lam, y_b

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

# ---------------- Train / Eval ----------------
def cosine_lr_with_warmup(optimizer, epoch, base_lr, total_epochs, warmup_epochs):
    if epoch < warmup_epochs:
        lr = base_lr * float(epoch + 1) / float(max(1, warmup_epochs))
    else:
        progress = float(epoch - warmup_epochs) / float(max(1, total_epochs - warmup_epochs))
        lr = 0.5 * base_lr * (1 + math.cos(math.pi * progress))
    for pg in optimizer.param_groups:
        pg['lr'] = lr
    return lr

def train_epoch(model, loader, optimizer, criterion, device, scaler: torch.cuda.amp.GradScaler):
    model.train()
    total, correct, loss_sum = 0, 0, 0.0
    for x, y, _ in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)

        # MixUp
        x_mix, y_a, lam, y_b = mixup_data(x, y, MIXUP_ALPHA)

        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast('cuda'):
            logits = model(x_mix)
            loss = mixup_criterion(criterion, logits, y_a, y_b, lam)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP_NORM)
        scaler.step(optimizer)
        scaler.update()

        loss_sum += loss.item() * x.size(0)
        # compute mixup accuracy proxy (argmax vs y)
        with torch.no_grad():
            pred = logits.argmax(1)
            correct += ((pred == y).float().mean().item() * x.size(0))  # proxy
        total += x.size(0)
    return loss_sum/total, correct/total

@torch.no_grad()
def eval_epoch(model, loader, criterion, device, apply_thresh=False, thr=0.45):
    model.eval()
    total, correct, loss_sum = 0, 0, 0.0
    yt, yp, keys = [], [], []
    for x, y, k in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        with torch.amp.autocast('cuda'):
            logits = model(x)
            loss = criterion(logits, y)
        loss_sum += loss.item() * x.size(0)
        pred = apply_none_threshold(logits, thr) if apply_thresh else logits.argmax(1)
        correct += (pred == y).sum().item()
        total += x.size(0)
        yt.extend(y.cpu().numpy()); yp.extend(pred.cpu().numpy()); keys.extend(k)
    return loss_sum/total, correct/total, np.array(yt), np.array(yp), keys

def make_7x4_contingency(keys: List[str], preds: np.ndarray):
    seven = ["wifi","bt","zb","wifi_bt","zb_bt","wifi_zb","wifi_bt_zb"]
    table = {k: [0,0,0,0] for k in seven}
    for k,p in zip(keys, preds):
        if k in table and 0 <= p < 4:
            table[k][p] += 1
    return table

def print_contingency(table):
    cols = ["pred_none","pred_wifi","pred_bt","pred_zb"]
    print("\nPer-input-class 7x4 contingency (rows=true, cols=pred):")
    print("seven_class".ljust(18) + "".join([c.rjust(12) for c in cols]))
    for k,row in table.items():
        print(k.ljust(18) + "".join([str(v).rjust(12) for v in row]))

# ---------------- Main ----------------
def main():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Device:", device)

    base_ds = InterfDataset(DATA_ROOT, FOLDERS, INTERF_MAP_POLICY, img_size=IMG_SIZE, augment=True)
    n = len(base_ds)
    tr_idx, va_idx, te_idx = split_indices(n, VAL_SPLIT, TEST_SPLIT)
    tr_ds = SubsetDataset(base_ds, tr_idx, True)
    va_ds = SubsetDataset(base_ds, va_idx, False)
    te_ds = SubsetDataset(base_ds, te_idx, False)

    # Balanced sampler for TRAIN
    if USE_BALANCED_SAMPLER:
        weights = make_weights_for_balanced_sampler(base_ds, tr_idx)
        sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)
        train_loader = DataLoader(tr_ds, batch_size=BATCH_SIZE, sampler=sampler,
                                  num_workers=NUM_WORKERS, pin_memory=pin_memory, drop_last=True)
    else:
        train_loader = DataLoader(tr_ds, batch_size=BATCH_SIZE, shuffle=True,
                                  num_workers=NUM_WORKERS, pin_memory=pin_memory, drop_last=True)

    val_loader = DataLoader(va_ds, batch_size=BATCH_SIZE, shuffle=False,
                            num_workers=NUM_WORKERS, pin_memory=pin_memory)
    test_loader = DataLoader(te_ds, batch_size=BATCH_SIZE, shuffle=False,
                            num_workers=NUM_WORKERS, pin_memory=pin_memory)

    model = MobileNetV3Small4(pretrained=True, dropout_p=0.25).to(device)
    # Freeze earlier layers a bit, fine-tune later ones
    for p in model.m.features[:5].parameters():
        p.requires_grad = False

    criterion = LabelSmoothingCE(LABEL_SMOOTH).to(device)
    optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                                  lr=BASE_LR, weight_decay=WEIGHT_DECAY)
    scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))

    best_val, patience = float("inf"), PATIENCE
    best_path = "interf_mnv3s_best.pt"

    for ep in range(1, EPOCHS+1):
        lr_now = cosine_lr_with_warmup(optimizer, ep-1, BASE_LR, EPOCHS, WARMUP_EPOCHS)

        tr_loss, tr_acc = train_epoch(model, train_loader, optimizer, criterion, device, scaler)
        va_loss, va_acc, _, _, _ = eval_epoch(model, val_loader, criterion, device,
                                              apply_thresh=APPLY_NONE_THRESHOLD, thr=NONE_THRESH)

        print(f"Epoch {ep:02d} | lr {lr_now:.2e} | train {tr_loss:.4f}/{tr_acc:.4f} | val {va_loss:.4f}/{va_acc:.4f}")

        if va_loss < best_val - 1e-4:
            best_val = va_loss
            torch.save(model.state_dict(), best_path)
            patience = PATIENCE
            print("  ↪ saved best")
        else:
            patience -= 1
            if patience == 0:
                print("Early stopping.")
                break

    # --- TEST ---
    model.load_state_dict(torch.load(best_path, map_location=device))
    te_loss, te_acc, yt, yp, kt = eval_epoch(model, test_loader, criterion, device,
                                             apply_thresh=APPLY_NONE_THRESHOLD, thr=NONE_THRESH)
    print(f"\nTest Loss {te_loss:.4f} | Acc {te_acc:.4f}")

    unique_labels = sorted(np.unique(np.concatenate([yt, yp])))
    idx2name = {v:k for k,v in INTERF_IDX.items()}
    label_names = [idx2name[i] for i in unique_labels]
    print("\nClassification report (present classes only):")
    print(classification_report(yt, yp, labels=unique_labels, target_names=label_names, digits=4))

    cm = confusion_matrix(yt, yp, labels=[0,1,2,3])
    print("\nConfusion Matrix (4x4) [rows=true, cols=pred]:")
    print(cm)

    cont = make_7x4_contingency(kt, yp)
    print_contingency(cont)

    # --- ONNX ---
    try:
        import onnx  # noqa: F401
        dummy = torch.randn(1,1,IMG_SIZE,IMG_SIZE).to(device)
        torch.onnx.export(model, dummy, "interf_mnv3s_best.onnx",
                          input_names=["spectrogram"], output_names=["logits"],
                          opset_version=12, do_constant_folding=True)
        print("\nExported ONNX: interf_mnv3s_best.onnx")
    except Exception as e:
        print(f"\n(ONNX export skipped: {e})")
    print("Saved PyTorch: interf_mnv3s_best.pt")

if __name__ == "__main__":
    main()



SyntaxError: invalid syntax (ipython-input-367915219.py, line 183)