In [None]:
# ---------- Install necessary packages ----------
%pip install numpy pandas opencv-python scikit-learn matplotlib tqdm
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu129

In [None]:
# CELL 1 — Imports (project-wide)
# =========================

# Core + IO
import os, json, pickle, math, random, gc
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import PIL.Image as Image

# Torch + Torchvision
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights

# Sklearn
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

# Plots
import matplotlib.pyplot as plt


In [3]:
# CELL 2 — Device Config & Cuda Setup
# =========================

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.backends.cudnn.benchmark = True
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
    print("CUDA is available! Training on GPU...")
else:
    print("CUDA is not available. Training on CPU...")

NVIDIA GeForce RTX 5070 Ti
CUDA is available! Training on GPU...


In [None]:
# CELL 3 — Variable Config / Globals (project-wide)
# =========================

# ----- PATHS -----
ROOT = Path(".")
UNIFIED_CSV = ROOT / "DatasetLabel.csv"
MODELS_UNIFIED_ROOT = ROOT / "Models" / "Unified"

# ----- DATA / MODEL CONFIG -----
IMAGE_SIZE = 224
SAMPLE_EVERY = 1
NUM_CLASSES = 6
CLASS_NAMES = ["fall", "light", "fan", "curtain", "screen", "none"]
LABEL_TO_IDX = {c: i for i, c in enumerate(CLASS_NAMES)}
OVERWRITE = True

# --- thresholds (tune if needed) ---
THRESH_PERCENTILE = 92.5   # on non-zero diffs
THRESH_MIN        = 2.0    # absolute min diff
VIDEO_EXTS = [".mp4", ".avi", ".mov", ".mkv", ".MP4", ".AVI", ".MOV", ".MKV"]

# ----- MODEL CHOICE -----
MODEL_NAME = "efficientnet_v2_s"

# ----- TRAINING HYPERPARAMS -----
TRAIN_BATCH_SIZE = 32
VAL_BATCH_SIZE   = 64
EPOCHS           = 200
LR               = 1e-3
WEIGHT_DECAY     = 1e-5
DROPOUT          = 0.3
HEAD_HIDDEN      = 128
FREEZE_BACKBONE  = True         # freeze conv backbone for stability on small data
USE_AMP_TRAIN    = True         # mixed precision on GPU

# ----- SPLIT SETTINGS -----
LOSO_USERS = ["hamad", "mohammad", "obaid", "saif"]     # gesture user ids
LOSO_TEST_SIZE_FALLNONE = 0.25                          # 75/25 split for fall/none per LOSO fold
MIXED_SEEDS = 5
MIXED_TEST_SIZE = 0.20                                  # 80/20 split for mixed data

# ----- REPRO -----
def seed_everything(seed: int):
    """Set seeds and deterministic flags so runs are repeatable."""
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False



In [None]:
# CELL 4 — Video → Motion Image (writes to image_path from UNIFIED_CSV)
# =========================

def ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def resolve_video_path(rel_path: str) -> Path:
    p = ROOT / rel_path
    if p.is_file():
        return p
    if p.suffix == "":
        for ext in VIDEO_EXTS:
            cand = p.with_suffix(ext)
            if cand.is_file():
                return cand
    if p.is_dir():
        for ext in VIDEO_EXTS:
            vids = sorted(p.glob(f"*{ext}"))
            if vids:
                return vids[0]
    raise FileNotFoundError(f"Video not found for: {rel_path}")

def pad_to_square(img: np.ndarray) -> np.ndarray:
    h, w = img.shape[:2]
    s = max(h, w)
    top = (s - h) // 2
    bottom = s - h - top
    left = (s - w) // 2
    right = s - w - left
    return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)

# ----- Process video to motion image -----
def process_video_to_motion_image(video_path: Path,
                                  sample_every: int,
                                  out_size: int) -> np.ndarray:
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"Failed to open {video_path}")

    sampled = []
    idx = 0
    while True:
        ok, frame = cap.read()
        if not ok:
            break
        if idx % sample_every == 0:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.float32)
            sampled.append(gray)
        idx += 1
    cap.release()

    if len(sampled) < 2:
        raise RuntimeError(f"Not enough sampled frames ({len(sampled)}) in {video_path}")

    acc = np.zeros_like(sampled[0], dtype=np.float32)
    prev = sampled[0]
    for cur in sampled[1:]:
        diff = np.abs(cur - prev)
        nz = diff[diff > 0]
        if nz.size > 0:
            thr = max(np.percentile(nz, THRESH_PERCENTILE), THRESH_MIN)
        else:
            thr = THRESH_MIN
        mask = diff >= thr
        acc[mask] += diff[mask]
        prev = cur

    m = acc.max()
    if m > 0:
        acc = acc / m
    acc = (acc * 255.0).clip(0, 255).astype(np.uint8)

    acc = pad_to_square(acc)
    acc = cv2.resize(acc, (out_size, out_size), interpolation=cv2.INTER_AREA)

    # stack grayscale to 3 channels for the backbone
    img3 = np.stack([acc, acc, acc], axis=2)
    return img3

# ---- run over UNIFIED_CSV ----
df_u = pd.read_csv(UNIFIED_CSV)

for row in tqdm(df_u.itertuples(index=False), total=len(df_u)):
    rel_video = getattr(row, "video_path")
    rel_image = getattr(row, "image_path")

    try:
        vid_abs = resolve_video_path(rel_video)
        img_abs = (ROOT / rel_image)
        ensure_dir(img_abs.parent)

        if OVERWRITE or not img_abs.exists():
            img = process_video_to_motion_image(
                vid_abs, sample_every=SAMPLE_EVERY, out_size=IMAGE_SIZE
            )
            ok = cv2.imwrite(str(img_abs), cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
            if not ok:
                raise RuntimeError("cv2.imwrite failed")

    except Exception as e:
        print(f"Failed processing {rel_video}: {e}")

print("All videos processed.")


100%|██████████| 260/260 [01:13<00:00,  3.55it/s]

All videos processed.





In [None]:
# CELL 5 — Transforms + Dataset Class
# =========================

# Basic normalization and slight training augmentations
train_tfms = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.45),               # Add RandomErasing to train transforms
    transforms.RandomHorizontalFlip(p=0.3),         # Add RandomHorizontalFlip to train transforms
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225]),
])

val_tfms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],
                         std=[0.229,0.224,0.225]),
])

class UnifiedMotionDataset(Dataset):
    """Dataset that reads image paths and labels from a unified CSV using provided row indices."""
    def __init__(self, csv_path: Path, root: Path, 
                 indices: np.ndarray, tfms):
        self.df = pd.read_csv(csv_path)
        self.root = root
        self.indices = indices
        self.tfms = tfms

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, i):
        row = self.df.iloc[self.indices[i]]
        img_abs = self.root / row["image_path"]
        bgr = cv2.imread(str(img_abs), cv2.IMREAD_COLOR)
        if bgr is None:
            raise FileNotFoundError(f"Missing image: {img_abs}")
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        pil = Image.fromarray(rgb)  
        x = self.tfms(pil)
        y = LABEL_TO_IDX[row["label"]]
        return x, y


In [None]:
# CELL 5 — CNN Model, Backbone + Head
# =========================

def make_backbone_and_dim(model_name: str = "efficientnet_v2_s"):
    """Create conv backbone and return (module, feature_channels). """
    if model_name == "efficientnet_v2_s":
        m = efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)
    else:
        raise ValueError("Only 'efficientnetv2_s' supported in this cell.")

    backbone = m.features  # conv feature extractor (no classifier)
    # Infer channel dim with a dummy forward at current IMAGE_SIZE
    with torch.no_grad():
        backbone.eval()
        dummy = torch.zeros(1, 3, IMAGE_SIZE, IMAGE_SIZE)
        feats = backbone(dummy)
        feat_dim = feats.shape[1]  # channels
    return backbone, feat_dim

# ----- UnifiedNet Model -----
class UnifiedNet(nn.Module):
    """
    Simple unified classifier:
      - conv backbone (EfficientNetV2-S)
      - global max pooling (keeps strong motion edges)
      - Linear -> BN -> ReLU -> Dropout -> Linear (to NUM_CLASSES)
    """
    def __init__(self,
                 num_classes: int,
                 model_name: str = "efficientnet_v2_s",
                 freeze_backbone: bool = True,
                 head_hidden: int = 128,
                 dropout: float = 0.3):
        super().__init__()
        self.backbone, feat_dim = make_backbone_and_dim(model_name)

        if freeze_backbone:
            for p in self.backbone.parameters():
                p.requires_grad = False

        self.pool = nn.AdaptiveMaxPool2d(1)
        self.bn   = nn.BatchNorm1d(head_hidden)
        self.drop = nn.Dropout(dropout)
        self.fc1  = nn.Linear(feat_dim, head_hidden)
        self.relu = nn.ReLU(inplace=True)
        self.fc2  = nn.Linear(head_hidden, num_classes)

    def forward(self, x):
        x = self.backbone(x)           # [B, C, H, W]
        x = self.pool(x).flatten(1)    # [B, C]
        x = self.fc1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.drop(x)
        logits = self.fc2(x)
        return logits


In [None]:
# CELL 7 — Train/Eval + Saving helpers
# =========================

def save_unified_artifacts(save_dir: Path,
                           state_dict: dict,
                           config: dict,
                           metrics: dict,
                           cm: np.ndarray,
                           class_names: list):
    """Write model weights, config, metrics, and confusion matrix (csv+png)."""
    save_dir.mkdir(parents=True, exist_ok=True)
    torch.save(state_dict, save_dir / "model_state.pt")
    with open(save_dir / "config.json", "w") as f: json.dump(config, f, indent=2)
    with open(save_dir / "metrics.json", "w") as f: json.dump(metrics, f, indent=2)
    np.savetxt(save_dir / "confusion_matrix.csv", cm, fmt="%d", delimiter=",")

    # Quick CM plot
    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation="nearest")
    ax.set_title("Confusion Matrix")
    ax.set_xlabel("Predicted"); ax.set_ylabel("True")
    plt.colorbar(im, ax=ax)
    ax.set_xticks(range(len(class_names))); ax.set_xticklabels(class_names, rotation=45, ha="right")
    ax.set_yticks(range(len(class_names))); ax.set_yticklabels(class_names)
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, int(cm[i, j]), ha="center", va="center", fontsize=8)
    fig.tight_layout()
    fig.savefig(save_dir / "confusion_matrix.png", dpi=160)
    plt.close(fig)

def train_eval_one_split(train_idx, test_idx, 
                         seed: int, tag: str):
    """
    Train once on given train/test indices and return metrics + best state.
    - Keeps the backbone frozen by default.
    - Uses CrossEntropy + AdamW.
    - Tracks best val f1 score over epochs and returns that checkpoint.
    """
    seed_everything(seed)

    # Data loaders
    train_ds = UnifiedMotionDataset(UNIFIED_CSV, ROOT, train_idx, train_tfms)
    test_ds  = UnifiedMotionDataset(UNIFIED_CSV, ROOT, test_idx,  val_tfms)
    train_loader = DataLoader(train_ds, batch_size=TRAIN_BATCH_SIZE, shuffle=True,  num_workers=0)
    test_loader  = DataLoader(test_ds,  batch_size=VAL_BATCH_SIZE,   shuffle=False, num_workers=0)

    # Model, loss, optimizer
    model = UnifiedNet(num_classes=NUM_CLASSES,
                       model_name=MODEL_NAME,
                       freeze_backbone=FREEZE_BACKBONE,
                       head_hidden=HEAD_HIDDEN,
                       dropout=DROPOUT).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                                  lr=LR, weight_decay=WEIGHT_DECAY)

    # Track best-f1m checkpoint
    best = {"f1m": 0.0, "state": None}

    # Clear cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    
    # Train for EPOCHS, check test set each epoch (small data; OK for quick selection)
    for _ in range(1, EPOCHS + 1):
        model.train()

        for xb, yb in train_loader:
            xb = xb.to(device); yb = yb.to(device)
            with torch.amp.autocast("cuda", enabled=(USE_AMP_TRAIN and device.type == "cuda")):
                logits = model(xb)
                loss = criterion(logits, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # quick eval
        model.eval()
        all_logits, all_y = [], []
        with torch.no_grad():
            for xb, yb in test_loader:
                xb = xb.to(device)
                all_logits.append(model(xb).cpu())
                all_y.append(yb)
        logits = torch.cat(all_logits); y_true = torch.cat(all_y)
        y_pred = logits.argmax(1)
        acc = accuracy_score(y_true, y_pred)
        f1m = f1_score(y_true, y_pred, average="macro")
        if f1m > best["f1m"]:
            best["f1m"] = f1m
            best["state"] = {k: v.cpu() for k, v in model.state_dict().items()}

    # Final metrics with best weights
    model.load_state_dict(best["state"])
    model.eval()
    y_true_all, y_pred_all, y_prob_all = [], [], []
    with torch.no_grad():
        for xb, yb in test_loader:
            xb = xb.to(device)
            logits = model(xb).cpu()
            probs  = torch.softmax(logits, dim=1)       # softmax = confidence per class
            y_prob_all.append(probs)
            y_pred_all.append(logits.argmax(1))
            y_true_all.append(yb)
    y_true = torch.cat(y_true_all).numpy()
    y_pred = torch.cat(y_pred_all).numpy()
    cm = confusion_matrix(y_true, y_pred, labels=list(range(NUM_CLASSES)))
    acc = accuracy_score(y_true, y_pred)
    f1m = f1_score(y_true, y_pred, average="macro")

    cfg = {
        "image_size": IMAGE_SIZE,
        "epochs": EPOCHS,
        "lr": LR,
        "weight_decay": WEIGHT_DECAY,
        "dropout": DROPOUT,
        "head_hidden": HEAD_HIDDEN,
        "freeze_backbone": FREEZE_BACKBONE,
        "backbone": MODEL_NAME,
        "classes": CLASS_NAMES,
        "seed": seed,
        "tag": tag,
    }
    # metrics = {"acc": float(acc), "f1_macro": float(f1m)}
    return {"acc": acc, "f1_macro": f1m, "cm": cm, "cfg": cfg, "state": best["state"], "tag": tag}


In [9]:
# CELL 8 — Build split indices (LOSO and Mixed)
# =========================

# Load once to build masks
df_unified = pd.read_csv(UNIFIED_CSV).reset_index(drop=True)
labels = df_unified["label"].astype(str).values
usercol = df_unified.get("user_id", pd.Series([""]*len(df_unified))).astype(str).values

# Masks for task parts
is_gesture  = np.isin(labels, ["light","fan","curtain","screen"])
is_fallnone = np.isin(labels, ["fall","none"])

all_idx = np.arange(len(df_unified))


In [10]:
# CELL 9 — LOSO experiment
# =========================
results_loso = []

for u in LOSO_USERS:
    # Gesture test/train by user id
    gest_test_idx  = all_idx[(is_gesture)  & (usercol == u)]
    gest_train_idx = all_idx[(is_gesture)  & (usercol != u)]

    # Fall+none split (unique per user via seed)
    seed = LOSO_USERS.index(u) + 1  # seeds 1..4
    seed_everything(seed)
    fn_idx = all_idx[is_fallnone]
    fn_labels = labels[is_fallnone]
    sss = StratifiedShuffleSplit(n_splits=1, test_size=LOSO_TEST_SIZE_FALLNONE, random_state=seed)
    fn_train_sub, fn_test_sub = next(sss.split(fn_idx, fn_labels))
    fn_train_idx = fn_idx[fn_train_sub]
    fn_test_idx  = fn_idx[fn_test_sub]

    # Combine to final train/test for this fold
    train_idx = np.concatenate([gest_train_idx, fn_train_idx])
    test_idx  = np.concatenate([gest_test_idx,  fn_test_idx])

    res = train_eval_one_split(train_idx, test_idx, seed=seed, tag=f"LOSO_user_{u}")
    results_loso.append((u, res))
    print(f"[LOSO {u}] acc={res['acc']:.3f}  f1_macro={res['f1_macro']:.3f}")

    save_dir = MODELS_UNIFIED_ROOT / "LOSO" / f"user_{u}"
    save_unified_artifacts(save_dir, res["state"], res["cfg"],
                           {"acc": res["acc"], "f1_macro": res["f1_macro"]},
                           res["cm"], CLASS_NAMES)

print("LOSO avg acc:", np.mean([r[1]["acc"] for r in results_loso]).round(3))
print("LOSO avg f1 :", np.mean([r[1]["f1_macro"] for r in results_loso]).round(3))


[LOSO hamad] acc=0.969  f1_macro=0.970
[LOSO mohammad] acc=0.969  f1_macro=0.972
[LOSO obaid] acc=0.908  f1_macro=0.902
[LOSO saif] acc=0.800  f1_macro=0.792
LOSO avg acc: 0.912
LOSO avg f1 : 0.909


In [11]:
# CELL 10 — Mixed 80/20 experiment
# =========================
results_mixed = []

# Integer labels for stratification
y_for_strat = np.array([LABEL_TO_IDX[l] for l in labels])

for i in range(MIXED_SEEDS):
    seed = 100 + i
    seed_everything(seed)
    sss = StratifiedShuffleSplit(n_splits=1, test_size=MIXED_TEST_SIZE, random_state=seed)
    train_idx, test_idx = next(sss.split(all_idx, y_for_strat))

    res = train_eval_one_split(train_idx, test_idx, seed=seed, tag=f"Mixed_seed_{seed}")
    results_mixed.append(res)
    print(f"[Mixed {i+1}] acc={res['acc']:.3f}  f1_macro={res['f1_macro']:.3f}")

    save_dir = MODELS_UNIFIED_ROOT / "Mixed" / f"seed_{seed}"
    save_unified_artifacts(save_dir, res["state"], res["cfg"],
                           {"acc": res["acc"], "f1_macro": res["f1_macro"]},
                           res["cm"], CLASS_NAMES)

print("Mixed avg acc:", np.mean([r["acc"] for r in results_mixed]).round(3))
print("Mixed avg f1 :", np.mean([r["f1_macro"] for r in results_mixed]).round(3))


[Mixed 1] acc=0.981  f1_macro=0.979
[Mixed 2] acc=0.981  f1_macro=0.979
[Mixed 3] acc=0.962  f1_macro=0.958
[Mixed 4] acc=0.942  f1_macro=0.944
[Mixed 5] acc=0.923  f1_macro=0.922
Mixed avg acc: 0.958
Mixed avg f1 : 0.956
