In [2]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"

import lightning as L
from lightning.pytorch.loggers import TensorBoardLogger

from model import ClipHARModel
from data import MotionDataDescription, MotionDataModule
from model import ClipLoss

import torch
from lightning.pytorch.callbacks import (
    ModelCheckpoint, EarlyStopping, DeviceStatsMonitor, Timer
)
import glob
import re
torch.set_float32_matmul_precision("high")
print(torch.cuda.is_available())

  from .autonotebook import tqdm as notebook_tqdm


True


In [3]:
EPOCH = 10

# %%
data_decs = MotionDataDescription.build_from_folder("./Human Action Recognition")

# %%

datamodule = MotionDataModule(data_decs, batch_size=64, val_size=0.2)
# %%
model = ClipHARModel(labels=data_decs.label)
print(model)

# %%
# print(model)
# %%
tb_logger = TensorBoardLogger("logs", name="clip_har")

checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    # dirpath="checkpoints",
    filename="model-{epoch:02d}-{val_loss:.2f}",
    save_top_k=3,
    mode="min",
    save_last=True,
)


early_stop_callback = EarlyStopping(
    monitor="val_loss",
    patience=10,
    mode="min",
)

timer_cb = Timer(interval="epoch")          # 每個 epoch 結束統計
stats_cb = DeviceStatsMonitor()             # GPU/CPU VRAM, RAM, util, FPS...

# %%
trainer = L.Trainer(
    callbacks=[checkpoint_callback, early_stop_callback],
    logger=tb_logger,
    max_epochs=EPOCH,
    log_every_n_steps=20,
    # fast_dev_run=True,
    accelerator="gpu",
    devices=1 
)

# %%
trainer.fit(model, datamodule=datamodule)


ClipHARModel(
  (model): CLIPModel(
    (text_model): CLIPTextTransformer(
      (embeddings): CLIPTextEmbeddings(
        (token_embedding): Embedding(49408, 768)
        (position_embedding): Embedding(77, 768)
      )
      (encoder): CLIPEncoder(
        (layers): ModuleList(
          (0-11): 12 x CLIPEncoderLayer(
            (self_attn): CLIPSdpaAttention(
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (mlp): CLIPMLP(
              (activation_fn): QuickGELUActivation()
              (fc1): Linear(in_features=768, out_features=3072, bias=True)
              (fc2): Linear(in_features=3072, out_features=768, bias=True

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type      | Params | Mode 
---------------------------------------------------
0 | model        | CLIPModel | 427 M  | eval 
1 | weight_image | Linear    | 11.5 K | train
2 | clip_loss_fn | ClipLoss  | 0      | train
---------------------------------------------------
1.4 M     Trainable params
426 M     Non-trainable params
427 M     Total params
1,711.823 Total estimated model params size (MB)
2         Modules in train mode
450       Modules in eval mode


Sanity Checking: |                                                                               | 0/? [00:00<?, ?it/s]

C:\Users\USER\anaconda3\envs\Deeplearning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                                                                       

C:\Users\USER\anaconda3\envs\Deeplearning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 0: 100%|██████████████████████████████████████████████████████████████| 158/158 [03:40<00:00,  0.72it/s, v_num=3]
Validation: |                                                                                    | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                               | 0/40 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                  | 0/40 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|█▍                                                        | 1/40 [00:00<00:03, 10.66it/s][A
Validation DataLoader 0:   5%|██▉                                                       | 2/40 [00:00<00:03, 12.12it/s][A
Validation DataLoader 0:   8%|████▎                                                     | 3/40 [00:00<00:09,  4.07it/s][A
Validation DataLoader 0:  10%|█████▊                                                    | 4/40 [00:02<00:18,  1.93it/s][A
Validation DataLoad

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████████████████████████████████████████████████████████| 158/158 [07:01<00:00,  0.38it/s, v_num=3]


# evaluate

In [4]:
import os
import torch
import lightning as L
from data import MotionDataDescription, MotionDataModule
from pathlib import Path
from model import ClipHARModel


# ───── 資料集 ──────────────────────────────────────
data_decs   = MotionDataDescription.build_from_folder("./Human Action Recognition")
datamodule  = MotionDataModule(data_decs,
                               batch_size=64,
                               val_size=0.2,
                               num_workers=4, pin_memory=True)

# ───── 建立模型並設定 Trainer（訓練階段）────────────
labels = [
    "calling", "clapping", "cycling", "dancing", "drinking",
    "eating", "fighting", "hugging", "laughing", "listening_to_music",
    "running", "sitting", "sleeping", "texting", "using_laptop",
]

model = ClipHARModel(labels=labels)

checkpoint_callback = L.pytorch.callbacks.ModelCheckpoint(
    monitor="val_acc",
    mode="max",
    save_top_k=3,
    filename="model-{epoch:02d}-{val_acc:.2f}",
)
early_stop_callback = L.pytorch.callbacks.EarlyStopping(
    monitor="val_acc",
    patience=5,
    mode="max",
)
tb_logger = L.pytorch.loggers.TensorBoardLogger("logs", name="clip_har")

trainer = L.Trainer(
    callbacks=[checkpoint_callback, early_stop_callback],
    logger=tb_logger,
    max_epochs=30,
    log_every_n_steps=20,
    accelerator="auto",
    devices=1 if torch.cuda.is_available() else None,
)

def find_latest_ckpts(base_dir):
    # 1. 找出所有 version_x 資料夾
    version_dirs = glob.glob(os.path.join(base_dir, "version_*"))
    if not version_dirs:
        raise FileNotFoundError(f"No version_* folder found in {base_dir}")

    # 2. 解析 version 編號，找最大值
    def extract_version_num(path):
        match = re.search(r"version_(\d+)", path)
        return int(match.group(1)) if match else -1

    version_dirs.sort(key=extract_version_num, reverse=True)
    latest_version = version_dirs[0]

    # 3. 搜尋最新 version 資料夾下的所有 .ckpt 檔案
    ckpt_dir = os.path.join(latest_version, "checkpoints")
    ckpt_paths = glob.glob(os.path.join(ckpt_dir, "*.ckpt"))

    if not ckpt_paths:
        raise FileNotFoundError(f"No .ckpt found in {ckpt_dir}")
    
    return ckpt_paths

# 使用範例
base_log_dir = r"C:\Users\USER\Desktop\清大\Deeplearning\final_project\HAR_Clip\logs\clip_har"
ckpt_paths = find_latest_ckpts(base_log_dir)


results = {}
for ckpt in ckpt_paths:
    ckpt = Path(ckpt)
    assert ckpt.exists(), f"找不到檔案：{ckpt}"

    print(f"🔄  載入 {ckpt.name} ...")
    test_model = ClipHARModel.load_from_checkpoint(
        ckpt, labels=labels, strict=False
    )

    # 使用 validate() 或 test() 皆可，依照你在 model 裡實作的 step
    metrics = trainer.validate(model=test_model, datamodule=datamodule, verbose=False)
    # 若用 test_step()： trainer.test(model=test_model, datamodule=datamodule)

    acc_key = [k for k in metrics[0] if "acc" in k][0]   # e.g. 'val_acc' 或 'test_acc'
    acc = metrics[0][acc_key]
    results[ckpt.name] = acc
    print(f"✅  {ckpt.name:<40}  {acc_key} = {acc:.4%}\n")

best = max(results, key=results.get)
print("🏆  最佳模型:", best, f"(Acc={results[best]:.4%})")


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


🔄  載入 last.ckpt ...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\USER\anaconda3\envs\Deeplearning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Validation DataLoader 0: 100%|█████████████████████████████████████████████████████████| 40/40 [00:52<00:00,  0.76it/s]
✅  last.ckpt                                 val_acc = 98.0159%

🔄  載入 model-epoch=00-val_loss=0.24.ckpt ...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\USER\anaconda3\envs\Deeplearning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Validation DataLoader 0: 100%|█████████████████████████████████████████████████████████| 40/40 [00:52<00:00,  0.76it/s]
✅  model-epoch=00-val_loss=0.24.ckpt         val_acc = 95.1984%

🔄  載入 model-epoch=01-val_loss=0.23.ckpt ...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\USER\anaconda3\envs\Deeplearning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Validation DataLoader 0: 100%|█████████████████████████████████████████████████████████| 40/40 [00:52<00:00,  0.76it/s]
✅  model-epoch=01-val_loss=0.23.ckpt         val_acc = 96.8651%

🔄  載入 model-epoch=02-val_loss=0.22.ckpt ...


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Users\USER\anaconda3\envs\Deeplearning\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Validation DataLoader 0: 100%|█████████████████████████████████████████████████████████| 40/40 [00:53<00:00,  0.75it/s]
✅  model-epoch=02-val_loss=0.22.ckpt         val_acc = 96.4683%

🏆  最佳模型: last.ckpt (Acc=98.0159%)
