In [2]:
# Google Driverのマウント
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# ディスクの余分な容量を削除
!pip cache purge
!rm -rf /root/.local/share/Trash/*

[0mFiles removed: 0


In [4]:
# ディスク容量、他を確認
!df -h /content

Filesystem      Size  Used Avail Use% Mounted on
overlay         113G   39G   74G  35% /


In [5]:
# HAR-2way-modelのクローンコピー
!git clone https://github.com/JojiOkazaki/HAR-2way-model.git
%cd HAR-2way-model
!ls

Cloning into 'HAR-2way-model'...
remote: Enumerating objects: 237, done.[K
remote: Counting objects: 100% (237/237), done.[K
remote: Compressing objects: 100% (133/133), done.[K
remote: Total 237 (delta 116), reused 210 (delta 92), pack-reused 0 (from 0)[K
Receiving objects: 100% (237/237), 116.28 KiB | 2.47 MiB/s, done.
Resolving deltas: 100% (116/116), done.
/content/HAR-2way-model
config_base.py	dataset_builder  README.md  training


In [6]:
# .zipの展開をしてデータセットの用意をする
!mkdir -p /content/dataset
!unzip -q "/content/drive/MyDrive/HAR-2way-model/datas/datasets.zip" -d /content/dataset
!df -h /content
!ls -R /content/dataset | head -n 20

Filesystem      Size  Used Avail Use% Mounted on
overlay         113G   98G   15G  87% /
/content/dataset:
datasets

/content/dataset/datasets:
katorilab
ucf101

/content/dataset/datasets/katorilab:
processed_112_32

/content/dataset/datasets/katorilab/processed_112_32:
pt
splits

/content/dataset/datasets/katorilab/processed_112_32/pt:
20250802_094617.pt
20250802_100426.pt
20250802_101041.pt
20250802_101506.pt
20250802_102656.pt


In [7]:
# HAR-2way-model-trainingに必要なライブラリのインストール
!pip install torch==2.5.0 torchvision pyyaml tqdm matplotlib

import torch
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")

Collecting torch==2.5.0
  Downloading torch-2.5.0-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.5.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.5.0)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.5.0)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.5.0)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.5.0)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.5.0)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3

In [8]:
%%writefile config_local.py

# config_local.py
from pathlib import Path
RAW_DATA_ROOT = Path("/content/dataset/").resolve()
DATASETS_ROOT = Path(RAW_DATA_ROOT / "datasets").resolve()
DATASET_NAME = "katorilab"
DATASET_ROOT = (DATASETS_ROOT / DATASET_NAME).resolve()
ARTIFACT_ROOT = Path("/content/drive/MyDrive/HAR-2way-model/artifact").resolve()

TRAIN_RATIO = 0.58
VAL_RATIO = 0.14
TEST_RATIO = 0.29
SPLIT_SEED = 42
ENABLE_GROUP_BALANCE = True
GROUP_BALANCE_ALPHA = 5.0

# ファイルパスの存在チェック
print(f"Checking paths...")
print(f"DATASET_ROOT exists: {DATASET_ROOT.exists()} ({DATASET_ROOT})")
# 例: katorilabの中身が見えるか確認
if DATASET_ROOT.exists():
    print(f"Contents of {DATASET_NAME}: {[p.name for p in DATASET_ROOT.iterdir()]}")

assert DATASET_ROOT.exists(), f"Error: {DATASET_ROOT} が見つかりません。解凍パスを確認してください。"

Writing config_local.py


In [15]:
%%writefile training/configs/train.yaml

# train.yaml
logging:
  log_dir: runs
  best_model_name: best_model.pt
  csv_name: loss_history.csv
  graph_dir: graphs
  graph_size: [6, 4]

dataset:
  name: ucf101

data:
  processed_dir: processed_112_32
  split: split01
  # img_pt_dir: processed_112_32/pt
  # skel_pt_dir: processed_112_32/pt
  # train_file_list: processed_112_32/splits/default/train_list.txt
  # val_file_list: processed_112_32/splits/default/val_list.txt
  num_workers: 16

weighted_sampler:
  enabled: true
  alpha: 1.0

model:
  architecture:
    img:
      cnn:
        conv_channels: [3,16,32,64,128,256,256]
        fc_layers: [256]
        dropouts: [0.0,0.2,0.3,0.4,0.5,0.5]
        input_size: [112, 112]
      transformer:
        d_model: 256
        nhead: 8
        num_layers: 3
        dim_ff: 256
        max_len: 32
        dropout: 0.7

    skel:
      stgcn:
        channels: [3,64,64,64,128,128,128,256,256,256]
        dropouts: [0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3]
        temporal_kernel_size: 9

    mlp:
      layers: [512, 512, 768]
      dropouts: [0.4]

  loss_weights: [1.0, 1.0, 1.0]

training:
  batch_size: 16
  epochs: 120
  accum_steps: 4
  max_norm: 5.0 # 勾配が大きすぎないようにする。小さいと学習が遅くなり安定する。
  seed: 42
  patience: 5
  min_delta: 0.005
  recall_k: 1
  
  # proto_ce で学習（(A)）
  loss_mode: proto_ce

  # proto作成用の走査バッチ（VRAM節約で train batch と分けたい場合）
  prototype_batch_size: 64

  # valid人物判定（Trainer側で使う）
  min_valid_t: 16

  # unknown扱いしたい label_id（例: 999 を学習対象から外す）
  unknown_label_ids: [999]

  # proto_ce でも温度は使う（logitsのスケーリング）
  temperature: 0.2

optimizer:
  lr: 0.0001 # 1e-3になると学習できない 0.0005 -> 0.001 (2倍)
  weight_decay: 0.02
  warmup_epochs: 1 # 学習の立ち上がりを遅くすることで序盤を安定させる。小さいと立ち上がりが速くなる。
  min_lr_ratio: 0.1 # 学習後半で学習率を小さくすることで振動を安定させる。小さいと最終学習率が小さくなる。

runtime:
  device: cuda


Overwriting training/configs/train.yaml


In [10]:
!nvidia-smi
print("\n\n")
!cat /proc/cpuinfo | grep "model name" | head -n 1
print("\n\n")
!free -h
print("\n\n")
!cat /etc/issue
print("\n\n")
import sys
print(f"Python version: {sys.version}")

Tue Feb  3 10:03:52 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   32C    P0             45W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [16]:
!python -m training.main --config training/configs/train.yaml

Checking paths...
DATASET_ROOT exists: True (/content/dataset/datasets/katorilab)
Contents of katorilab: ['processed_112_32']
Create Data Loader...
elapsed time: 00:00:00
Build class prototypes from train labels...
elapsed time: 00:01:08
Create Model...
elapsed time: 00:00:00
Training...
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
100% 477/477 [02:03<00:00,  3.85it/s]
100% 116/116 [00:24<00:00,  4.76it/s]
[Epoch 1]acc:0.016/0.026 | main:13.739/13.446 | full:4.570/4.433 | img:4.603/4.518 | skel:4.565/4.496
elapsed time: 00:02:29
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
100% 477/477 [01:32<00:00,  5.14it/s]
100% 116/116 [00:17<00:00,  6.57it/s]
[Epoch 2]acc:[32m0.031[0m/[32m0.035[0m | main:[32m13.349[0m/[32m13.095[0m | full:[32m4.394[0m/[32m4.271[0m | img:[32m4.496[0m/[32m4.413[0m | skel:[32m4.458[0m/[32m4.410[0m
elapsed time: 00:01:52
100% 477/47