In [1]:
import argparse
import datetime
import json
import random
import time
import multiprocessing
from pathlib import Path

import numpy as np
import torch
from torch.utils.data import DataLoader, DistributedSampler

import hotr.data.datasets as datasets
import hotr.util.misc as utils
from hotr.engine.arg_parser import get_args_parser
from hotr.data.datasets import build_dataset, get_coco_api_from_dataset
from hotr.engine.trainer import train_one_epoch
from hotr.engine import hoi_evaluator, hoi_accumulator
from hotr.models import build_model
import wandb

from hotr.util.logger import print_params, print_args



In [2]:
command='''--HOIDet \
--share_enc \
--pretrained_dec \
--num_hoi_queries 16 \
--object_threshold 0 \
--temperature 0.05 \
--no_aux_loss \
--eval \
--dataset_file vcoco \
--data_path /gemini/code/v-coco \
--resume /gemini/code/hotr/checkpoints/vcoco/MMH_vcoco_pos_scaler/vcoco_multi_run/best.pth \
--use_pos_info \
--pos_scaler_type scaler
'''
command=command.strip().split(' ')

In [2]:
command='''--HOIDet \
--share_enc \
--pretrained_dec \
--num_hoi_queries 16 \
--object_threshold 0 \
--temperature 0.05 \
--no_aux_loss \
--eval \
--dataset_file vcoco \
--data_path /gemini/code/v-coco \
--resume /gemini/code/hotr/checkpoints/vcoco/MMH/vcoco_multi_run_no_use_pos_info/best.pth \
'''
command=command.strip().split(' ')

In [3]:
parser = argparse.ArgumentParser(
    'End-to-End Human Object Interaction training and evaluation script',
    parents=[get_args_parser()]
)

args = parser.parse_args(args=command)

In [4]:
utils.init_distributed_mode(args)

if args.frozen_weights is not None:
    print("Freeze weights for detector")

device = torch.device(args.device)

# fix the seed for reproducibility
seed = args.seed + utils.get_rank()
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

Not using distributed mode


In [5]:
# Data Setup
dataset_train = build_dataset(image_set='train', args=args)
dataset_val = build_dataset(image_set='val' if not args.eval else 'test', args=args)
assert dataset_train.num_action() == dataset_val.num_action(), "Number of actions should be the same between splits"
args.num_classes = dataset_train.num_category()
args.num_actions = dataset_train.num_action()
args.action_names = dataset_train.get_actions()
if args.share_enc: args.hoi_enc_layers = args.enc_layers
if args.pretrained_dec: args.hoi_dec_layers = args.dec_layers
if args.dataset_file == 'vcoco':
    # Save V-COCO dataset statistics
    # 这里有动作的重要参数
    args.valid_ids = np.array(dataset_train.get_object_label_idx()).nonzero()[0]
    args.invalid_ids = np.argwhere(np.array(dataset_train.get_object_label_idx()) == 0).squeeze(1)
    args.human_actions = dataset_train.get_human_action()
    args.object_actions = dataset_train.get_object_action()
    args.num_human_act = dataset_train.num_human_act()
# elif args.dataset_file == 'hico-det':
#     args.valid_obj_ids = dataset_train.get_valid_obj_ids()
print_args(args)

if args.distributed:
    sampler_train = DistributedSampler(dataset_train, shuffle=True)
    sampler_val = DistributedSampler(dataset_val, shuffle=False)
else:
    sampler_train = torch.utils.data.RandomSampler(dataset_train)
    sampler_val = torch.utils.data.SequentialSampler(dataset_val)

batch_sampler_train = torch.utils.data.BatchSampler(
    sampler_train, args.batch_size, drop_last=True)

data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                               collate_fn=utils.collate_fn, num_workers=args.num_workers)
data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val,
                             drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers)

loading annotations into memory...
Done (t=1.61s)
creating index...
index created!
loading annotations into memory...
Done (t=1.59s)
creating index...
index created!

[Logger] DETR Arguments:
	lr: 0.0001
	lr_backbone: 1e-05
	lr_drop: 80
	frozen_weights: None
	backbone: resnet50
	dilation: False
	position_embedding: sine
	enc_layers: 6
	dec_layers: 6
	num_queries: 100
	dataset_file: vcoco

[Logger] DETR_HOI Arguments:
	hoi_dec_layers: 6
	hoi_nheads: 8
	hoi_dim_feedforward: 2048
	hoi_idx_loss_coef: 1
	hoi_act_loss_coef: 1
	hoi_eos_coef: 0.1
	object_threshold: 0.0


In [6]:
# Model Setup
model, criterion, postprocessors = build_model(args)
model.to(device)

model_without_ddp = model
if args.distributed:
    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
    model_without_ddp = model.module
n_parameters = print_params(model)

param_dicts = [
    {"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
    {
        "params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad],
        "lr": args.lr_backbone,
    },
]


[Logger] Number of params:  51576960


In [7]:
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

# Weight Setup
if args.frozen_weights is not None:
    if args.frozen_weights.startswith('https'):
        checkpoint = torch.hub.load_state_dict_from_url(
            args.frozen_weights, map_location='cpu', check_hash=True)
    else:
        checkpoint = torch.load(args.frozen_weights, map_location='cpu')
    model_without_ddp.detr.load_state_dict(checkpoint['model'])

if args.resume:
    if args.resume.startswith('https'):
        checkpoint = torch.hub.load_state_dict_from_url(
            args.resume, map_location='cpu', check_hash=True)
    else:
        checkpoint = torch.load(args.resume, map_location='cpu')
    model_without_ddp.load_state_dict(checkpoint['model'])

In [8]:
if args.eval:
    # test only mode
    if args.HOIDet:
        if args.dataset_file == 'vcoco':
            total_res = hoi_evaluator(args, model, criterion, postprocessors, data_loader_val, device)
            sc1, sc2 = hoi_accumulator(args, total_res, True, False)
        elif args.dataset_file == 'hico-det':
            test_stats = hoi_evaluator(args, model, None, postprocessors, data_loader_val, device)
            print(f'| mAP (full)\t\t: {test_stats["mAP"]:.2f}')
            print(f'| mAP (rare)\t\t: {test_stats["mAP rare"]:.2f}')
            print(f'| mAP (non-rare)\t: {test_stats["mAP non-rare"]:.2f}')
        else:
            raise ValueError(f'dataset {args.dataset_file} is not supported.')
    else:
        test_stats, coco_evaluator = evaluate_coco(model, criterion, postprocessors,
                                                   data_loader_val, base_ds, device, args.output_dir)
        if args.output_dir:
            utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, output_dir / "eval.pth")

Evaluation Inference (V-COCO)  [ 644/2462]  eta: 0:26:48    time: 0.8085  data: 0.0169  max mem: 1476

[31m2023-05-02 16:35:21 [ERROR] [ -4280403,Out of memory ] cuMemAlloc(): fail to allocate 32768 KB memory (out of memory)[0m
[31m2023-05-02 16:35:21 [ERROR] [ -4280406,Allocator error ] Failed to init new suballocator: error code 2[0m


Evaluation Inference (V-COCO)  [ 922/2462]  eta: 0:22:52    time: 0.8910  data: 0.0110  max mem: 1779

[31m2023-05-02 16:39:34 [ERROR] [ -4280403,Out of memory ] cuMemAlloc(): fail to allocate 202752 KB memory (out of memory)[0m


Evaluation Inference (V-COCO)  [2462/2462]  eta: 0:00:00    time: 0.8943  data: 0.0211  max mem: 2973
[stats] Total Time (test) : 0:36:10 (0.8815 s / it)
[stats] HOI Recognition Time (avg) : 46.0185 ms
[stats] Distributed Gathering Time : 0:00:00
[stats] Score Matrix Generation completed!!          

               hold_obj: AP = 46.36 (#pos = 3608)
              sit_instr: AP = 41.35 (#pos = 1916)
             ride_instr: AP = 60.13 (#pos = 556)
               look_obj: AP = 34.53 (#pos = 3347)
              hit_instr: AP = 72.55 (#pos = 349)
                hit_obj: AP = 49.74 (#pos = 349)
                eat_obj: AP = 53.60 (#pos = 521)
              eat_instr: AP = 62.53 (#pos = 521)
             jump_instr: AP = 68.73 (#pos = 635)
              lay_instr: AP = 51.91 (#pos = 387)
    talk_on_phone_instr: AP = 47.59 (#pos = 285)
              carry_obj: AP = 33.16 (#pos = 472)
              throw_obj: AP = 45.72 (#pos = 244)
              catch_obj: AP = 42.73 (#pos = 246)
         

In [8]:
if args.eval:
    # test only mode
    if args.HOIDet:
        if args.dataset_file == 'vcoco':
            total_res = hoi_evaluator(args, model, criterion, postprocessors, data_loader_val, device)
            sc1, sc2 = hoi_accumulator(args, total_res, True, False)
        elif args.dataset_file == 'hico-det':
            test_stats = hoi_evaluator(args, model, None, postprocessors, data_loader_val, device)
            print(f'| mAP (full)\t\t: {test_stats["mAP"]:.2f}')
            print(f'| mAP (rare)\t\t: {test_stats["mAP rare"]:.2f}')
            print(f'| mAP (non-rare)\t: {test_stats["mAP non-rare"]:.2f}')
        else:
            raise ValueError(f'dataset {args.dataset_file} is not supported.')
    else:
        test_stats, coco_evaluator = evaluate_coco(model, criterion, postprocessors,
                                                   data_loader_val, base_ds, device, args.output_dir)
        if args.output_dir:
            utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, output_dir / "eval.pth")

Evaluation Inference (V-COCO)  [ 644/2462]  eta: 0:28:02    time: 0.8494  data: 0.0107  max mem: 1478

[31m2023-05-02 17:20:27 [ERROR] [ -4280403,Out of memory ] cuMemAlloc(): fail to allocate 32768 KB memory (out of memory)[0m
[31m2023-05-02 17:20:27 [ERROR] [ -4280406,Allocator error ] Failed to init new suballocator: error code 2[0m


Evaluation Inference (V-COCO)  [ 922/2462]  eta: 0:23:39    time: 0.9698  data: 0.0121  max mem: 1781

[31m2023-05-02 17:24:42 [ERROR] [ -4280403,Out of memory ] cuMemAlloc(): fail to allocate 202752 KB memory (out of memory)[0m


Evaluation Inference (V-COCO)  [2462/2462]  eta: 0:00:00    time: 1.0531  data: 0.0154  max mem: 2975
[stats] Total Time (test) : 0:39:53 (0.9722 s / it)
[stats] HOI Recognition Time (avg) : 120.9336 ms
[stats] Distributed Gathering Time : 0:00:00
[stats] Score Matrix Generation completed!!          

               hold_obj: AP = 51.31 (#pos = 3608)
              sit_instr: AP = 48.93 (#pos = 1916)
             ride_instr: AP = 64.84 (#pos = 556)
               look_obj: AP = 41.70 (#pos = 3347)
              hit_instr: AP = 78.95 (#pos = 349)
                hit_obj: AP = 74.21 (#pos = 349)
                eat_obj: AP = 58.48 (#pos = 521)
              eat_instr: AP = 72.55 (#pos = 521)
             jump_instr: AP = 73.96 (#pos = 635)
              lay_instr: AP = 55.12 (#pos = 387)
    talk_on_phone_instr: AP = 52.75 (#pos = 285)
              carry_obj: AP = 36.90 (#pos = 472)
              throw_obj: AP = 57.03 (#pos = 244)
              catch_obj: AP = 53.21 (#pos = 246)
        