In [None]:
# Cell 1: Setup environment, imports, paths

import os
import sys
import json
import numpy as np
import torch
from torch import optim
import wandb
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
# 1) Set your project root folder here (whe
# re utils/, models/, configs/ are)
PROJECT_ROOT = "FACT"
os.chdir(PROJECT_ROOT)
print("Current working directory:", os.getcwd())

if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

# 2) Import your modules with fixed absolute import paths (no leading dot)
from utils.dataset import DataLoader, create_dataset
from utils.evaluate import Checkpoint
from home import get_project_base
from configs.utils import cfg2flatdict, setup_cfg
from utils.train_tools import resume_ckpt, compute_null_weight, save_results
from models.loss import MatchCriterion


  from .autonotebook import tqdm as notebook_tqdm


Current working directory: /home/cair/Dharmendra/FACT_actseg/src


In [None]:
# Cell 2: Configuration (replace with your actual config yaml files or dicts)

# List of config yaml files relative to PROJECT_ROOT or absolute paths
CONFIG_FILES = [
    "FACT/configs/gtea3.yaml"
]

# Example override configs as a list (like --set in CLI)
SET_CFGS = [
    "dataset", "gtea",
    "split", "split2",
    "aux.gpu", "0",
    "batch_size", "1",
    "epoch", "400",
    "lr", "0.0001",
    "optimizer", "Adam",
    "aux.debug", False,
    "aux.eval_every", "500",
    "aux.print_every", "100",
    "clip_grad_norm", "10.0"
    # no override for Loss.temporal_affinity_weight here!
]


# Setup config object
cfg = setup_cfg(CONFIG_FILES, SET_CFGS)

# Fix base directory to project root (optional, depends on your get_project_base implementation)
BASE = PROJECT_ROOT


In [3]:
# Cell 3: Initialize wandb (optional, you can disable if you want)

try:
    run = wandb.init(
        project=cfg.aux.wandb_project if hasattr(cfg.aux, 'wandb_project') else "default_project",
        entity=cfg.aux.wandb_user if hasattr(cfg.aux, 'wandb_user') else None,
        dir=cfg.aux.logdir if hasattr(cfg.aux, 'logdir') else "./wandb_logs",
        group=cfg.aux.exp if hasattr(cfg.aux, 'exp') else None,
        resume="allow",
        config=cfg2flatdict(cfg),
        reinit=True,
        save_code=False,
        mode="offline" if cfg.aux.debug else "online",
    )
except Exception as e:
    print("WARNING: Failed to initialize wandb.")
    run = None


[34m[1mwandb[0m: Network error (SSLError), entering retry loop.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
# Cell 4: Setup directories & save config

logdir = os.path.join(BASE, cfg.aux.logdir if hasattr(cfg.aux, 'logdir') else "logs")
ckptdir = os.path.join(logdir, 'ckpts')
savedir = os.path.join(logdir, 'saves')

os.makedirs(logdir, exist_ok=True)
os.makedirs(ckptdir, exist_ok=True)
os.makedirs(savedir, exist_ok=True)

print("Saving logs in:", logdir)

# Save config dict as json for reference
argSaveFile = os.path.join(logdir, 'args.json')
with open(argSaveFile, 'w') as f:
    json.dump(cfg, f, indent=4)


Saving logs in: /home/cair/Dharmendra/FACT_actseg/src/log/gtea/split2/gtea/1


In [5]:
# Cell 5: Load datasets

dataset, test_dataset = create_dataset(cfg)

if not cfg.aux.debug:
    trainloader = DataLoader(dataset, batch_size=cfg.batch_size, shuffle=True)
else:
    trainloader = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=True)

testloader = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=False)

print("Train dataset:", dataset)
print("Test dataset:", test_dataset)


Loading Feature from /home/cair/Dharmendra/data_i3d/gtea/features/
Loading Label from /home/cair/Dharmendra/data_i3d/gtea/groundTruth
Train dataset: < Dataset 21 videos, 2048 feat-size, 11 classes >
Test dataset: < Dataset 7 videos, 2048 feat-size, 11 classes >


In [None]:
# Cell 6: Create model and loss


from models_faformer.fact import FACT
net = FACT(cfg, dataset.input_dimension, dataset.nclasses)

# Compute null class weight if needed
if cfg.Loss.nullw == -1:
    compute_null_weight(cfg, dataset)

net.mcriterion = MatchCriterion(cfg, dataset.nclasses, dataset.bg_class)

# Load checkpoint if exists
global_step, ckpt_file = resume_ckpt(cfg, logdir)
if ckpt_file is not None:
    ckpt = torch.load(ckpt_file, map_location="cpu")
    # Remove positional encodings if they exist in checkpoint (optional)
    if 'frame_pe.pe' in ckpt: del ckpt['frame_pe.pe']
    if 'action_pe.pe' in ckpt: del ckpt['action_pe.pe']
    net.load_state_dict(ckpt, strict=False)

net.cuda()
print(net)


No resume, Train from Scratch
FACT(
  (frame_pe): PositionalEncoding(EMPTY)
  (channel_masking_dropout): Dropout2d(p=0.5, inplace=False)
  (block_list): ModuleList(
    (0): InputBlock(
      f:MSTCN(h:2048->128x10->512, d=2, ng=1, dropout=0.2, in_map=True),
      a:SCADecoder(
      (layers): ModuleList(
        (0): SCALayer( adim:128, fdim:512, head:8, ffdim:512, dropout:(0.2, 0.2), svpos:False, cvpos:False )
        (1): SCALayer( adim:128, fdim:512, head:8, ffdim:512, dropout:(0.2, 0.2), svpos:False, cvpos:False )
        (2): SCALayer( adim:128, fdim:512, head:8, ffdim:512, dropout:(0.2, 0.2), svpos:False, cvpos:False )
        (3): SCALayer( adim:128, fdim:512, head:8, ffdim:512, dropout:(0.2, 0.2), svpos:False, cvpos:False )
        (4): SCALayer( adim:128, fdim:512, head:8, ffdim:512, dropout:(0.2, 0.2), svpos:False, cvpos:False )
        (5): SCALayer( adim:128, fdim:512, head:8, ffdim:512, dropout:(0.2, 0.2), svpos:False, cvpos:False )
      )
      (out_linear): Linear(in_f

In [7]:
# Cell 7: Setup optimizer

if cfg.optimizer.lower() == 'sgd':
    optimizer = optim.SGD(net.parameters(),
                          lr=cfg.lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay)
elif cfg.optimizer.lower() == 'adam':
    optimizer = optim.Adam(net.parameters(),
                           lr=cfg.lr, weight_decay=cfg.weight_decay)
else:
    raise NotImplementedError(f"Optimizer {cfg.optimizer} not implemented.")


In [8]:
# Cell 8: Evaluation function

def evaluate(global_step, net, testloader, run, savedir):
    print("TESTING" + "~"*10)

    ckpt = Checkpoint(global_step+1, bg_class=([] if net.cfg.eval_bg else testloader.dataset.bg_class))
    net.eval()
    with torch.no_grad():
        for batch_idx, (vnames, seq_list, train_label_list, eval_label_list) in enumerate(testloader):

            seq_list = [ s.cuda() for s in seq_list ]
            train_label_list = [ s.cuda() for s in train_label_list ]
            video_saves = net(seq_list, train_label_list)
            save_results(ckpt, vnames, eval_label_list, video_saves)

    net.train()
    ckpt.compute_metrics()

    log_dict = {}
    string = ""
    for k, v in ckpt.metrics.items():
        string += "%s:%.3f, " % (k, v)
        log_dict[f'test-metric/{k}'] = v
    print(string + '\n')

    if run is not None:
        run.log(log_dict, step=global_step+1)

    fname = "%d.gz" % (global_step+1)
    ckpt.save(os.path.join(savedir, fname))

    return ckpt


In [None]:
# Cell 9: Training loop

global_step = global_step if 'global_step' in locals() else 0
start_epoch = global_step // len(trainloader) if len(trainloader) > 0 else 0

ckpt = Checkpoint(-1, bg_class=(dataset.bg_class if cfg.eval_bg else []), eval_edit=False)
best_ckpt, best_metric = None, 0

print(f'Start Training from Epoch {start_epoch}...')

for eidx in range(start_epoch, cfg.epoch):
    for batch_idx, (vnames, seq_list, train_label_list, eval_label_list) in enumerate(trainloader):

        seq_list = [ s.cuda() for s in seq_list ]
        train_label_list = [ s.cuda() for s in train_label_list ]

        optimizer.zero_grad()
        loss, video_saves = net(seq_list, train_label_list, compute_loss=True)
        loss.backward()

        if cfg.clip_grad_norm > 0:
            torch.nn.utils.clip_grad_norm_(net.parameters(), cfg.clip_grad_norm)
        optimizer.step()

        save_results(ckpt, vnames, eval_label_list, video_saves)

        # Print progress info
        if (global_step+1) % cfg.aux.print_every == 0:
            ckpt.compute_metrics()
            ckpt.average_losses()

            log_dict = {}
            string = f"Iter{global_step+1}, "
            _L = len(string)

            for k, v in ckpt.loss.items():
                log_dict[f"train-loss/{k}"] = v
                string += f"{k}:{v:.4f}, "
            print(string)

            string = " " * _L
            for k, v in ckpt.metrics.items():
                string += f"{k}:{v:.4f}, "
                log_dict['train-metric/'+k] = v
            print(string)

            if run is not None:
                run.log(log_dict, step=global_step+1)

            ckpt = Checkpoint(-1, bg_class=(dataset.bg_class if cfg.eval_bg else []), eval_edit=False)

            if global_step != 0 and (global_step+1) % cfg.aux.eval_every == 0:
                test_ckpt = evaluate(global_step, net, testloader, run, savedir)
                current_metric = test_ckpt.metrics.get('F1@0.50', 0)

                if current_metric >= best_metric:
                    best_ckpt = test_ckpt
                    best_metric = current_metric

                    # Save best model in .net format
                    best_model_net_path = os.path.join(ckptdir, 'best_model.net')
                    net.save_model(best_model_net_path)

                    # Save best model in .pth format (PyTorch standard)
                    best_model_pth_path = os.path.join(ckptdir, 'best_model.pth')
                    torch.save(net.state_dict(), best_model_pth_path)

                    print(f"✅ Saved Best Model: F1@0.50 = {best_metric:.4f}")
                    print(f"   ➤ .net format at: {best_model_net_path}")
                    print(f"   ➤ .pth format at: {best_model_pth_path}")

                # Optional: Always save current iteration model as well (for backup)
                network_file = os.path.join(ckptdir, f'network.iter-{global_step+1}.net')
                net.save_model(network_file)


        global_step += 1

    # Learning rate decay
    if cfg.lr_decay > 0 and (eidx + 1) % cfg.lr_decay == 0:
        for g in optimizer.param_groups:
            g['lr'] = cfg.lr * 0.1
        print('------------------------------------Updated Learning Rate--------------------------------')


Start Training from Epoch 0...




Iter100, loss:9.7122, 
         AccB:14.5852, Acc:14.5852, F1@0.10:2.8458, F1@0.25:0.7760, F1@0.50:0.7760, 
Iter200, loss:8.2323, 
         AccB:29.2961, Acc:29.2961, F1@0.10:29.9819, F1@0.25:20.6344, F1@0.50:9.6997, 
Iter300, loss:6.9311, 
         AccB:41.8887, Acc:41.8887, F1@0.10:49.3146, F1@0.25:38.7210, F1@0.50:25.0224, 
Iter400, loss:6.2245, 
         AccB:52.1291, Acc:52.1291, F1@0.10:53.8963, F1@0.25:46.7601, F1@0.50:34.7413, 
Iter500, loss:5.7071, 
         AccB:61.4901, Acc:61.4901, F1@0.10:65.0576, F1@0.25:61.1255, F1@0.50:45.7547, 
TESTING~~~~~~~~~~
Edit:58.164, AccB:56.523, Acc:65.685, F1@0.10:68.421, F1@0.25:62.280, F1@0.50:40.350, 

✅ Saved Best Model: F1@0.50 = 40.3504
   ➤ .net format at: /home/cair/Dharmendra/FACT_actseg/src/log/gtea/split2/gtea/1/ckpts/best_model.net
   ➤ .pth format at: /home/cair/Dharmendra/FACT_actseg/src/log/gtea/split2/gtea/1/ckpts/best_model.pth
Iter600, loss:5.2168, 
         AccB:67.0262, Acc:67.0262, F1@0.10:67.1953, F1@0.25:62.9625, F1@0.5

In [10]:
# Cell 10: Final evaluation & finish

if best_ckpt is not None:
    print(f'Best Checkpoint at iteration: {best_ckpt.iteration}')
    best_ckpt.eval_edit = True
    best_ckpt.compute_metrics()
    best_ckpt.save(os.path.join(logdir, 'best_ckpt.gz'))

if run is not None:
    run.finish()

# Mark experiment complete
finish_proof_fname = os.path.join(logdir, "FINISH_PROOF")
open(finish_proof_fname, "w").close()

print("Training complete.")


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Best Checkpoint at iteration: 7500


0,1
test-metric/Acc,▁▃▆▇▇▆▆▇▇█▇██▇██
test-metric/AccB,▁▅▇█▇▇▇▇▇███████
test-metric/Edit,▁▅▅▇▇█▇▇▇▇▇█████
test-metric/F1@0.10,▁▄▄▇▇▆▆▇▇█▇█▇▇█▇
test-metric/F1@0.25,▁▄▆▇▇▇▆▇▇█▇█▇▇█▇
test-metric/F1@0.50,▁▅▅▇▇▇▇▇▇▇██████
train-loss/loss,█▇▆▅▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train-metric/Acc,▁▃▄▅▆▇▇▇▇▇▇▇████████████████████████████
train-metric/AccB,▁▄▆▆▆▇▇▇▇▇▇▇▇███████████████████████████
train-metric/F1@0.10,▁▅▆▆▇▇▇▇▇▇▇▇██▇█████████████████████████

0,1
test-metric/Acc,84.31529
test-metric/AccB,77.62899
test-metric/Edit,86.46198
test-metric/F1@0.10,87.93724
test-metric/F1@0.25,82.48977
test-metric/F1@0.50,73.92946
train-loss/loss,2.12744
train-metric/Acc,96.22266
train-metric/AccB,96.22266
train-metric/F1@0.10,95.2449


Training complete.
