In [1]:
!pip install torch

Collecting torch
  Downloading torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)


In [2]:
import pandas as pd
import numpy as np
import os, glob, random, copy
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm # Use tqdm.notebook for Jupyter progress bars

# --- PyTorch Imports for ML Model ---
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

In [3]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Determine if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [19]:
# --- ML Model Configuration ---
# WINDOW_SIZE = 150       # Number of time steps (data points) in each input sequence/window 
WINDOW_SIZE = 1050       # 300 = 1 min 1050 = 3.5 min
STRIDE = WINDOW_SIZE #5             # How many steps to move the window (1 means maximum overlap, covering every point)

LR = 0.005  #1e-3
EPOCHS = 50            # Number of training epochs for the autoencoder
BATCH_SIZE = 256        # 128 Batch size for training
EARLY_STOP_PATIENCE = 15
DECISION_THR = 0.5 


# --- Post-processing configuration (applied to ML predictions) ---
MERGE_GAP_SECONDS = 6             # Maximum time gap between predicted wake segments to merge them
MIN_WAKE_DURATION_SECONDS = 0.5     # Minimum duration for a detected event to be considered a wake

# --- Paths ---
base_data_dir = "processed_ts"
output_base_dir_ml = "detected_wakes_CLS"
plots_output_dir_ml = os.path.join(output_base_dir_ml, "wake_plots/LSTM", f"w{WINDOW_SIZE},s{STRIDE}")
os.makedirs(plots_output_dir_ml, exist_ok=True)

In [20]:
def load_and_preprocess_data(file_path):
    """
    Loads the CSV data and performs basic preprocessing.
    Ensures 't_s' is sorted and resets index.
    """
    try:
        df = pd.read_csv(file_path)
        df = df.sort_values(by='t_s').reset_index(drop=True)
        return df
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None
    except Exception as e:
        print(f"Error loading or preprocessing data from {file_path}: {e}")
        return None

In [21]:
def get_ground_truth_wakes(df):
    """
    Extracts ground truth wake intervals from the 'wake_label' column.
    Returns a list of (start_time, end_time) tuples.
    """
    ground_truth_wakes = []
    in_wake = False
    wake_start_time = None

    for i, row in df.iterrows():
        t_s = row['t_s']
        wake_label = row['wake_label']

        if wake_label == 1 and not in_wake:
            wake_start_time = t_s
            in_wake = True
        elif wake_label == 0 and in_wake:
            wake_end_time = t_s
            ground_truth_wakes.append((wake_start_time, wake_end_time))
            in_wake = False
    
    # Handle case where wake extends to the end of the time series
    if in_wake:
        ground_truth_wakes.append((wake_start_time, df['t_s'].iloc[-1]))

    return ground_truth_wakes

In [22]:
def calculate_iou(interval1, interval2):
    """
    Calculates the Intersection over Union (IoU) of two time intervals.
    Intervals are (start, end) tuples.
    """
    start1, end1 = interval1
    start2, end2 = interval2

    # Calculate intersection duration
    intersection_start = max(start1, start2)
    intersection_end = min(end1, end2)
    
    intersection_duration = max(0, intersection_end - intersection_start)

    # Calculate union duration
    union_duration = (max(end1, end2) - min(start1, start2))

    if union_duration == 0:
        return 0.0 # No union means no overlap possible
    
    return intersection_duration / union_duration

In [23]:
def evaluate_detection(ground_truth_wakes, predicted_wakes, iou_threshold=0.5):
    """
    Evaluates the detection performance based on IoU.
    Returns TP, FP, FN, Accuracy, Precision, Recall, F1-Score.
    """
    tp = 0
    fp = 0
    fn = 0

    # Sets to keep track of matched ground truth and used predicted wakes
    matched_gt_indices = set()

    # Iterate through predicted wakes to find the best ground truth match for each
    for pred_idx, pred_wake in enumerate(predicted_wakes):
        best_iou_for_pred = 0.0
        potential_gt_idx_for_pred = -1

        for gt_idx, gt_wake in enumerate(ground_truth_wakes):
            if gt_idx in matched_gt_indices: # If this GT wake is already matched, skip it
                continue

            iou = calculate_iou(gt_wake, pred_wake)
            if iou > best_iou_for_pred:
                best_iou_for_pred = iou
                potential_gt_idx_for_pred = gt_idx
        
        # If the best IoU for this predicted wake is above threshold AND it matched an unmatched GT wake
        if best_iou_for_pred >= iou_threshold and potential_gt_idx_for_pred != -1:
            tp += 1
            matched_gt_indices.add(potential_gt_idx_for_pred) # Mark GT wake as matched
        else:
            fp += 1 # This predicted wake is a False Positive

    # Calculate False Negatives: any ground truth wake that was not matched
    fn = len(ground_truth_wakes) - len(matched_gt_indices)

    total_ground_truth = len(ground_truth_wakes)
    
    # Calculate performance metrics
    accuracy = tp / total_ground_truth if total_ground_truth > 0 else 0.0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    
    # F1-Score is the harmonic mean of precision and recall
    if (precision + recall) == 0:
        f1_score = 0.0
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)
    
    return tp, fp, fn, accuracy, precision, recall, f1_score

In [24]:
def plot_wake_detection(df, file_name, save_dir=None, recon_col: str | None = "z_m_recon"):
    """
    Plots the time series with GT (red spans) and predicted (green spans).
    If recon_col is present in df, overlays the reconstructed signal as a line.
    """
    plt.figure(figsize=(15, 6))

    # Original signal
    plt.plot(df['t_s'], df['z_m'], color='black', linewidth=0.8, label='z_m (Vertical Displacement)')

    # --- Ground truth spans (red) ---
    in_gt_wake = False
    gt_wake_start = None
    gt_label_added = False
    for _, row in df.iterrows():
        if row['wake_label'] == 1 and not in_gt_wake:
            gt_wake_start = row['t_s']; in_gt_wake = True
        elif row['wake_label'] == 0 and in_gt_wake:
            plt.axvspan(gt_wake_start, row['t_s'], color='red', alpha=0.3,
                        label='Ground Truth Wake' if not gt_label_added else "")
            gt_label_added = True; in_gt_wake = False
    if in_gt_wake:
        plt.axvspan(gt_wake_start, df['t_s'].iloc[-1], color='red', alpha=0.3,
                    label='Ground Truth Wake' if not gt_label_added else "")

    # --- Predicted spans (green) ---
    in_pred_wake = False
    pred_wake_start = None
    pred_label_added = False
    if 'predicted_wake_label' in df.columns:
        for _, row in df.iterrows():
            if row['predicted_wake_label'] == 1 and not in_pred_wake:
                pred_wake_start = row['t_s']; in_pred_wake = True
            elif row['predicted_wake_label'] == 0 and in_pred_wake:
                plt.axvspan(pred_wake_start, row['t_s'], color='green', alpha=0.3,
                            label='Predicted Wake' if not pred_label_added else "")
                pred_label_added = True; in_pred_wake = False
        if in_pred_wake:
            plt.axvspan(pred_wake_start, df['t_s'].iloc[-1], color='green', alpha=0.3,
                        label='Predicted Wake' if not pred_label_added else "")

    plt.title(f'Wake Detection for {file_name}', fontsize=16)
    plt.xlabel('Time (s)'); plt.ylabel('z_m (m)')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend()
    plt.tight_layout()

    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
        plot_path = os.path.join(save_dir, f'{os.path.basename(file_name).replace(".csv", "")}_wake_plot.png')
        plt.savefig(plot_path, dpi=300)
        plt.close()
    else:
        plt.show()


In [25]:
def create_sequences_pytorch(df_data, window_size, stride, scaler=None, fit_scaler=True):
    """
    Creates overlapping sequences (windows) from time series data ('z_m' column)
    and returns them as NumPy arrays along with original start indices.
    Manages StandardScaler fitting/transforming.
    """
    data = df_data['z_m'].values.reshape(-1, 1) # Reshape for StandardScaler

    if fit_scaler:
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(data)
    else:
        if scaler is None:
            raise ValueError("Scaler must be provided if fit_scaler is False.")
        scaled_data = scaler.transform(data)

    sequences = []
    original_indices = [] # Stores the starting index in the original DataFrame for each sequence
    """
    This list stores the starting index in the original scaled_data array for each created window.
    This is very important later when we need to map the model's predictions (which are per-window)
    back to the original time series data points.
    """
    
    # Loop to create sliding windows
    for i in range(0, len(scaled_data) - window_size + 1, stride):
        sequences.append(scaled_data[i : i + window_size])
        original_indices.append(i) # Record the start index of this window
    
    return np.array(sequences), np.array(original_indices), scaler

In [26]:
def get_wake_labels_for_windows(df, starts, window_size):
    y = []
    w = df["wake_label"].values
    for s in starts:
        y.append(int(w[s:s+window_size].max() == 1))
    return np.array(y, dtype=np.int64)

In [27]:
def build_window_dataset(split_files, window_size, stride, scaler, fit_scaler=False):
    X, y, trips = [], [], []
    for fp in split_files:
        df = load_and_preprocess_data(fp)
        if df is None or len(df)<window_size: 
            continue
        seqs, starts, scaler = create_sequences_pytorch(df, window_size, stride, scaler, fit_scaler)
        labels = get_wake_labels_for_windows(df, starts, window_size)
        X.append(seqs[:,:,0])    # (N,L)
        y.append(labels)
        for s in starts:
            trips.append((fp, int(s), int(s+window_size)))
    if not X:
        return None, None, None, scaler
    X = np.concatenate(X, axis=0)[:,None,:]   # (N,1,L)
    y = np.concatenate(y, axis=0).astype(np.float32)
    return X, y, trips, scaler

In [28]:
class CNN1DWake(nn.Module):
    def __init__(self, in_ch=1, hid=64, blocks=3, k=7, pdrop=0.2):
        super().__init__()
        layers, ch = [], in_ch
        for _ in range(blocks):
            layers += [
                nn.Conv1d(ch, hid, kernel_size=k, padding=k//2),
                nn.BatchNorm1d(hid),
                nn.ReLU(),
                nn.Dropout(pdrop),
            ]
            ch = hid
        self.backbone = nn.Sequential(*layers)
        self.head = nn.Sequential(
            nn.AdaptiveAvgPool1d(1), nn.Flatten(), nn.Linear(hid, 1)
        )
    def forward(self, x):             # x: (B,1,L)
        h = self.backbone(x)
        logit = self.head(h).squeeze(1)  # (B,)
        return logit


In [29]:
# NEW model: LSTM classifier
class LSTMWake(nn.Module):
    def __init__(self, input_size=1, hidden=64, num_layers=2, bidir=True, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=bidir,
            dropout=dropout if num_layers > 1 else 0.0
        )
        out_dim = hidden * (2 if bidir else 1)
        self.head = nn.Linear(out_dim, 1)

    def forward(self, x):        # x: (B, 1, L)
        x = x.transpose(1, 2)    # (B, L, 1)  <-- **key change vs CNN**
        h, _ = self.lstm(x)      # (B, L, H*D)
        h_mean = h.mean(dim=1)   # mean-pool over time
        logit = self.head(h_mean).squeeze(1)  # (B,)
        return logit

In [30]:
# ==== post-processing: windows -> per-sample mask -> intervals ====
def probs_to_intervals_for_file(df, idx_triplets, probs, thr, file_path):
    # paint positives to per-sample mask
    n = len(df)
    mask = np.zeros(n, dtype=np.int8)
    for (fp, s, e), p in zip(idx_triplets, probs):
        if fp != file_path: continue
        if p >= thr:
            e = min(e, n)
            mask[s:e] = 1

    # mask -> raw intervals (in seconds)
    t_s = df["t_s"].to_numpy()
    z = mask.astype(np.int8)
    dz = np.diff(np.pad(z, (1,1)))
    starts = np.where(dz == 1)[0]
    ends   = np.where(dz == -1)[0]
    raw = [(float(t_s[s]), float(t_s[e-1])) for s, e in zip(starts, ends)]

    # merge & filter
    if not raw: 
        return [], mask
    raw.sort()
    out = []
    cs, ce = raw[0]
    for s2, e2 in raw[1:]:
        if (s2 - ce) <= MERGE_GAP_SECONDS:
            ce = max(ce, e2)
        else:
            if (ce - cs) >= MIN_WAKE_DURATION_SECONDS:
                out.append((cs, ce))
            cs, ce = s2, e2
    if (ce - cs) >= MIN_WAKE_DURATION_SECONDS:
        out.append((cs, ce))
    return out, mask

In [31]:
# ________________________ main _________________________

In [32]:
# ===== Scan dataset splits (DEFINE BEFORE USING!) =====
dataset_splits = {'train': [], 'valid': [], 'test': []}
for split in dataset_splits.keys():
    split_path = os.path.join(base_data_dir, split)
    if os.path.exists(split_path):
        dataset_splits[split] = glob.glob(os.path.join(split_path, "*.csv"))
    else:
        print(f"Warning: '{split_path}' not found.")

In [33]:
# ==== build datasets (fit scaler on train only) ====
data_scaler = None
X_tr, y_tr, idx_tr, data_scaler = build_window_dataset(
    dataset_splits["train"], WINDOW_SIZE, STRIDE, scaler=data_scaler, fit_scaler=True
)
X_va, y_va, idx_va, _ = build_window_dataset(
    dataset_splits["valid"], WINDOW_SIZE, STRIDE, scaler=data_scaler, fit_scaler=False
)
X_te, y_te, idx_te, _ = build_window_dataset(
    dataset_splits["test"], WINDOW_SIZE, STRIDE, scaler=data_scaler, fit_scaler=False
)

if X_tr is None: raise RuntimeError("No training windows found.")

train_loader = DataLoader(TensorDataset(torch.from_numpy(X_tr).float(), torch.from_numpy(y_tr).float()),
                          batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
val_loader   = DataLoader(TensorDataset(torch.from_numpy(X_va).float(), torch.from_numpy(y_va).float()),
                          batch_size=BATCH_SIZE, shuffle=False, drop_last=False) if X_va is not None else None
test_loader  = DataLoader(TensorDataset(torch.from_numpy(X_te).float(),),
                          batch_size=BATCH_SIZE, shuffle=False, drop_last=False) if X_te is not None else None


In [34]:
import glob, os
import pandas as pd
from collections import Counter

def lengths_report(root="processed_ts", pattern="*.csv"):
    files = []
    for split in ("train","valid","test"):
        d = os.path.join(root, split)
        if os.path.isdir(d):
            files += glob.glob(os.path.join(d, pattern))
    lens = []
    for fp in files:
        try:
            n = len(pd.read_csv(fp))
            lens.append(n)
        except Exception as e:
            print("Error reading", fp, "->", e)

    if not lens:
        print("No files found.")
        return

    print(f"Files scanned: {len(lens)}")
    print(f"Min length: {min(lens)}")
    print(f"Max length: {max(lens)}")
    print(f"Mean length: {sum(lens)/len(lens):.1f}")
    # Show top 10 most common lengths
    common = Counter(lens).most_common(10)
    print("Most common lengths (top 10):")
    for L, c in common:
        print(f"  {L}: {c} files")

lengths_report("processed_ts", "*.csv")


Files scanned: 19441
Min length: 309
Max length: 5800
Mean length: 2977.5
Most common lengths (top 10):
  2937: 7933 files
  2799: 2672 files
  2800: 1621 files
  2993: 1253 files
  2994: 690 files
  5799: 674 files
  2795: 598 files
  2794: 469 files
  2796: 445 files
  2992: 414 files


In [None]:
# ==== loss (imbalanced) ====
pos = max(1, int((y_tr == 1).sum()))
neg = max(1, int((y_tr == 0).sum()))
pos_weight = torch.tensor([neg/pos], device=device, dtype=torch.float32)  # >1 if positives are rare
crit = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

In [20]:
# ==== train ====
# model = CNN1DWake().to(device)
model = LSTMWake(input_size=1, hidden=64, num_layers=2, bidir=True, dropout=0.2).to(device)
opt = torch.optim.AdamW(model.parameters(), lr=LR)

def predict_probs(loader):
    model.eval()
    outs = []
    with torch.no_grad():
        for batch in loader:
            xb = batch[0].to(device)
            p = torch.sigmoid(model(xb)).cpu().numpy()
            outs.append(p)
    return np.concatenate(outs) if outs else np.array([])

def window_metrics(y_true, probs, thr=0.5):
    preds = (probs >= thr).astype(np.int32)
    TP = int(((preds==1) & (y_true==1)).sum())
    FP = int(((preds==1) & (y_true==0)).sum())
    FN = int(((preds==0) & (y_true==1)).sum())
    TN = int(((preds==0) & (y_true==0)).sum())
    prec = TP/(TP+FP) if (TP+FP)>0 else 0.0
    rec  = TP/(TP+FN) if (TP+FN)>0 else 0.0
    f1   = 2*prec*rec/(prec+rec) if (prec+rec)>0 else 0.0
    loss = None
    return {"TP":TP,"FP":FP,"FN":FN,"TN":TN,"precision":prec,"recall":rec,"f1":f1}


In [21]:

best_state, best_val, stale = None, float("inf"), 0
for epoch in range(EPOCHS):
    # train
    model.train()
    run, seen = 0.0, 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = crit(logits, yb)
        opt.zero_grad(set_to_none=True); loss.backward(); opt.step()
        run += loss.item() * xb.size(0); seen += xb.size(0)
    train_loss = run / max(1, seen)

    # validate
    if val_loader is not None:
        # val loss
        model.eval()
        vrun, vseen = 0.0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                vrun += crit(model(xb), yb).item() * xb.size(0)
                vseen += xb.size(0)
        val_loss = vrun / max(1, vseen)

        # window-level F1 @ 0.5
        probs_va = predict_probs(val_loader)
        wm = window_metrics(y_va, probs_va, thr=DECISION_THR)
        print(f"Epoch {epoch+1:02d} | train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | val_F1@0.5={wm['f1']:.3f}")

        # early stopping on val_loss
        if val_loss < best_val:
            best_val = val_loss; best_state = copy.deepcopy(model.state_dict()); stale = 0
        else:
            stale += 1
            if stale >= EARLY_STOP_PATIENCE:
                print(f"Early stop @ epoch {epoch+1}. Best val_loss={best_val:.4f}")
                break
    else:
        print(f"Epoch {epoch+1:02d} | train_loss={train_loss:.4f}")


Epoch 01 | train_loss=0.6039 | val_loss=1.2974 | val_F1@0.5=0.399
Epoch 02 | train_loss=0.6195 | val_loss=1.2938 | val_F1@0.5=0.422
Epoch 03 | train_loss=0.5940 | val_loss=1.3920 | val_F1@0.5=0.358
Epoch 04 | train_loss=0.5850 | val_loss=1.2593 | val_F1@0.5=0.433
Epoch 05 | train_loss=0.5409 | val_loss=1.3657 | val_F1@0.5=0.428
Epoch 06 | train_loss=0.5299 | val_loss=1.2839 | val_F1@0.5=0.478
Epoch 07 | train_loss=0.5265 | val_loss=1.2590 | val_F1@0.5=0.458
Epoch 08 | train_loss=0.5207 | val_loss=1.3452 | val_F1@0.5=0.400
Epoch 09 | train_loss=0.5993 | val_loss=1.3908 | val_F1@0.5=0.345
Epoch 10 | train_loss=0.6257 | val_loss=1.2604 | val_F1@0.5=0.381
Epoch 11 | train_loss=0.5917 | val_loss=1.1890 | val_F1@0.5=0.460
Epoch 12 | train_loss=0.5677 | val_loss=1.1702 | val_F1@0.5=0.467
Epoch 13 | train_loss=0.6373 | val_loss=1.2549 | val_F1@0.5=0.418
Epoch 14 | train_loss=0.5902 | val_loss=1.1252 | val_F1@0.5=0.499
Epoch 15 | train_loss=0.5632 | val_loss=1.2808 | val_F1@0.5=0.381
Epoch 16 |

In [22]:
# restore best
if best_state is not None:
    model.load_state_dict(best_state)

In [23]:
# ==== TEST: window-level ====
if test_loader is not None:
    probs_te = predict_probs(test_loader)           # per-window probs, in the same order as X_te/y_te/idx_te
    wm_te = window_metrics(y_te, probs_te, thr=DECISION_THR)
    print(f"[TEST] Window-level @0.5: P={wm_te['precision']:.3f} R={wm_te['recall']:.3f} F1={wm_te['f1']:.3f} "
          f"(TP={wm_te['TP']} FP={wm_te['FP']} FN={wm_te['FN']} TN={wm_te['TN']})")


[TEST] Window-level @0.5: P=0.294 R=0.402 F1=0.340 (TP=274964 FP=659129 FN=408370 TN=2137946)


In [24]:
# ==== TEST: event-level (aggregate windows -> intervals) + plots ====
if X_te is not None:
    # group test windows by file
    by_file = {}
    for trip, p in zip(idx_te, probs_te):
        by_file.setdefault(trip[0], []).append((trip, p))

    agg_tp = agg_fp = agg_fn = 0
    plotted_files = []

    for fp, items in by_file.items():
        df = load_and_preprocess_data(fp)
        gt = get_ground_truth_wakes(df)
        trips = [t for t,_ in items]
        pvals = [p for _,p in items]

        pred_intervals, mask = probs_to_intervals_for_file(
            df, trips, pvals, DECISION_THR, fp
        )
        # add predicted_wake_label to df (for plots)
        df_pred = df.copy()
        df_pred["predicted_wake_label"] = mask

        # event metrics for this file
        tp, fp_, fn, acc, prec, rec, f1 = evaluate_detection(gt, pred_intervals, iou_threshold=0.5)
        agg_tp += tp; agg_fp += fp_; agg_fn += fn

        # save a CSV with predictions if you want
        rel = os.path.relpath(fp, base_data_dir)
        save_csv = os.path.join(output_base_dir_ml, "test", os.path.basename(rel))
        os.makedirs(os.path.dirname(save_csv), exist_ok=True)
        df_pred.to_csv(save_csv, index=False)

        # collect for random plotting later
        plotted_files.append((fp, df_pred))

    # aggregate event-level
    P = agg_tp/(agg_tp+agg_fp) if (agg_tp+agg_fp)>0 else 0.0
    R = agg_tp/(agg_tp+agg_fn) if (agg_tp+agg_fn)>0 else 0.0
    F1 = 2*P*R/(P+R) if (P+R)>0 else 0.0
    print(f"[TEST] Event-level IoU>=0.5: P={P:.3f} R={R:.3f} F1={F1:.3f} (TP={agg_tp} FP={agg_fp} FN={agg_fn})")

    # plot 10 random test files (original in red, predicted in green)
    k = min(10, len(plotted_files))
    for (fp, dfp) in random.sample(plotted_files, k):
        plot_wake_detection(dfp, os.path.basename(fp), save_dir=plots_output_dir_ml)

    print(f"Saved {k} plots to: {plots_output_dir_ml}")
else:
    print("No TEST set found; skipping event-level eval and plots.")

[TEST] Event-level IoU>=0.5: P=0.142 R=0.510 F1=0.223 (TP=1707 FP=10290 FN=1638)
Saved 10 plots to: detected_wakes_CLS/wake_plots/LSTM/w150,s5


In [25]:
########################################
################################
######################
###########
###
#