-
Notifications
You must be signed in to change notification settings - Fork 1
/
opt.py
142 lines (138 loc) · 6.18 KB
/
opt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import torch.optim as optim
from run_utils.callbacks.base import (
AccumulateRawOutput,
PeriodicSaver,
ProcessAccumulatedRawOutput,
ScalarMovingAverage,
ScheduleLr,
TrackLr,
VisualizeOutput,
TriggerEngine,
)
from run_utils.callbacks.logging import LoggingEpochOutput, LoggingGradient
from run_utils.engine import Events
from .targets import gen_targets, prep_sample
from .net_desc import create_model
from .run_desc import proc_valid_step_output, train_step, valid_step, viz_step_output
# TODO: training config only ?
# TODO: switch all to function name String for all option
def get_config(nr_type, mode):
return {
# ------------------------------------------------------------------
# ! All phases have the same number of run engine
# phases are run sequentially from index 0 to N
"phase_list": [
{
"run_info": {
# may need more dynamic for each network
"net": {
"desc": lambda: create_model(
input_ch=3, nr_types=nr_type,
freeze=True, mode=mode
),
"optimizer": [
optim.Adam,
{ # should match keyword for parameters within the optimizer
"lr": 1.0e-4, # initial learning rate,
"betas": (0.9, 0.999),
},
],
# learning rate scheduler
"lr_scheduler": lambda x: optim.lr_scheduler.StepLR(x, 25),
"extra_info": {
"loss": {
"np": {"bce": 1, "dice": 1},
"hv": {"mse": 1, "msge": 1},
"tp": {"bce": 1, "dice": 1},
},
},
# path to load, -1 to auto load checkpoint from previous phase,
# None to start from scratch
"pretrained": "/home/louwei/hover_net/ImageNet-ResNet50-Preact_pytorch.tar",
# 'pretrained': None,
},
},
"target_info": {"gen": (gen_targets, {}), "viz": (prep_sample, {})},
"batch_size": {"train": 16, "valid": 4,}, # engine name : value
"nr_epochs": 50,
},
{
"run_info": {
# may need more dynamic for each network
"net": {
"desc": lambda: create_model(
input_ch=3, nr_types=nr_type,
freeze=False, mode=mode
),
"optimizer": [
optim.Adam,
{ # should match keyword for parameters within the optimizer
"lr": 1.0e-4, # initial learning rate,
"betas": (0.9, 0.999),
},
],
# learning rate scheduler
"lr_scheduler": lambda x: optim.lr_scheduler.StepLR(x, 25),
"extra_info": {
"loss": {
"np": {"bce": 1, "dice": 1},
"hv": {"mse": 1, "msge": 1},
"tp": {"bce": 1, "dice": 1},
},
},
# path to load, -1 to auto load checkpoint from previous phase,
# None to start from scratch
"pretrained": -1,
},
},
"target_info": {"gen": (gen_targets, {}), "viz": (prep_sample, {})},
"batch_size": {"train": 4, "valid": 4,}, # batch size per gpu
"nr_epochs": 100,
},
],
# ------------------------------------------------------------------
# TODO: dynamically for dataset plugin selection and processing also?
# all enclosed engine shares the same neural networks
# as the on at the outer calling it
"run_engine": {
"train": {
# TODO: align here, file path or what? what about CV?
"dataset": "kumar", # whats about compound dataset ?
"nr_procs": 16, # number of threads for dataloader
"run_step": train_step, # TODO: function name or function variable ?
"reset_per_run": False,
# callbacks are run according to the list order of the event
"callbacks": {
Events.STEP_COMPLETED: [
# LoggingGradient(), # TODO: very slow, may be due to back forth of tensor/numpy ?
ScalarMovingAverage(),
],
Events.EPOCH_COMPLETED: [
TrackLr(),
PeriodicSaver(),
VisualizeOutput(viz_step_output),
LoggingEpochOutput(),
TriggerEngine("valid"),
ScheduleLr(),
],
},
},
"valid": {
"dataset": "kumar", # whats about compound dataset ?
"nr_procs": 8, # number of threads for dataloader
"run_step": valid_step,
"reset_per_run": True, # * to stop aggregating output etc. from last run
# callbacks are run according to the list order of the event
"callbacks": {
Events.STEP_COMPLETED: [AccumulateRawOutput(),],
Events.EPOCH_COMPLETED: [
# TODO: is there way to preload these ?
ProcessAccumulatedRawOutput(
lambda a: proc_valid_step_output(a, nr_types=nr_type)
),
LoggingEpochOutput(),
],
},
},
},
}