-
Notifications
You must be signed in to change notification settings - Fork 4
/
VMAEv2-ViTB-16x4.yaml
64 lines (64 loc) · 1.26 KB
/
VMAEv2-ViTB-16x4.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
DATA:
PATH_TO_DATA_DIR: "/path/to/ava"
NUM_FRAMES: 16
SAMPLING_RATE: 4
INPUT_CHANNEL_NUM: [3]
DATASETS: ['ava_v2.2']
TRAIN_MIN_SCALES: [256, 320]
TEST_MIN_SCALES: [256]
AVA:
ANNOTATION_DIR: "annotations_v2.2/"
MODEL:
WEIGHT: "/path/to/weight.pth"
BACKBONE:
CONV_BODY: "MAE-ViT-B"
PATHWAYS: 1
STM:
NUM_QUERIES: 100
HIDDEN_DIM: 256
NUM_STAGES: 6
ACTION_CLASSES: 80
OBJECT_CLASSES: 1
NUM_HEADS: 8
DROPOUT: 0.0
DIM_FEEDFORWARD: 2048
NUM_FCS: 2
ACTIVATION: 'ReLU'
SPATIAL_POINTS: 32
TEMPORAL_POINTS: 8
OUT_MULTIPLIER: 4
N_GROUPS: 4
NUM_CLS: 1
NUM_ACT: 1
NUM_REG: 1
OBJECT_WEIGHT: 2.0
ACTION_WEIGHT: 48.0
GIOU_WEIGHT: 2.0
L1_WEIGHT: 2.0
BACKGROUND_WEIGHT: 0.1
INTERMEDIATE_SUPERVISION: True
PERSON_THRESHOLD: 0.6
MEM_ACTIVE: False
ViT:
EMBED_DIM: 768
DEPTH: 12
USE_CHECKPOINT: True
LAYER_DECAY: 1.0
WEIGHT_DECAY: 1e-4
DROP_PATH_RATE: 0.2
SOLVER:
ITER_PER_EPOCH: 23048
MAX_EPOCH: 12
BASE_LR: 0.00001
WEIGHT_DECAY: 1e-4
STEPS: (5, 8)
WARMUP_FACTOR: 0.1
WARMUP_EPOCH: 2
CHECKPOINT_PERIOD: 1
EVAL_PERIOD: 1
EVAL_AFTER: 3
VIDEOS_PER_BATCH: 8
OPTIMIZING_METHOD: 'adamw'
TEST:
VIDEOS_PER_BATCH: 16
OUTPUT_DIR: "data/output/vitbv2-train"