Skip to content

Commit

Permalink
change way of layer freezing
Browse files Browse the repository at this point in the history
Remove `find_unused_parameters` in DDP and add a new step function in optimizer for freezing backbone. It will accelerate training speed in this way.
  • Loading branch information
L1aoXingyu committed May 25, 2021
1 parent dbf1604 commit 2b65882
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 21 deletions.
3 changes: 1 addition & 2 deletions fastreid/engine/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,6 @@ def __init__(self, cfg):
# for part of the parameters is not updated.
model = DistributedDataParallel(
model, device_ids=[comm.get_local_rank()], broadcast_buffers=False,
find_unused_parameters=True
)

self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
Expand Down Expand Up @@ -305,9 +304,9 @@ def build_hooks(self):

ret.append(hooks.LayerFreeze(
self.model,
self.optimizer,
cfg.MODEL.FREEZE_LAYERS,
cfg.SOLVER.FREEZE_ITERS,
cfg.SOLVER.FREEZE_FC_ITERS,
))

# Do PreciseBN before checkpointer, because it updates the model and need to
Expand Down
41 changes: 22 additions & 19 deletions fastreid/engine/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,19 +449,18 @@ def data_loader():


class LayerFreeze(HookBase):
def __init__(self, model, freeze_layers, freeze_iters, fc_freeze_iters):
def __init__(self, model, optimizer, freeze_layers, freeze_iters):
self._logger = logging.getLogger(__name__)

if isinstance(model, DistributedDataParallel):
model = model.module
self.model = model
self.optimizer = optimizer

self.freeze_layers = freeze_layers
self.freeze_iters = freeze_iters
self.fc_freeze_iters = fc_freeze_iters

self.is_frozen = False
self.fc_frozen = False

def before_step(self):
# Freeze specific layers
Expand All @@ -472,18 +471,6 @@ def before_step(self):
if self.trainer.iter >= self.freeze_iters and self.is_frozen:
self.open_all_layer()

if self.trainer.max_iter - self.trainer.iter <= self.fc_freeze_iters \
and not self.fc_frozen:
self.freeze_classifier()

def freeze_classifier(self):
for p in self.model.heads.classifier.parameters():
p.requires_grad_(False)

self.fc_frozen = True
self._logger.info("Freeze classifier training for "
"last {} iterations".format(self.fc_freeze_iters))

def freeze_specific_layer(self):
for layer in self.freeze_layers:
if not hasattr(self.model, layer):
Expand All @@ -493,8 +480,24 @@ def freeze_specific_layer(self):
if name in self.freeze_layers:
# Change BN in freeze layers to eval mode
module.eval()
for p in module.parameters():
p.requires_grad_(False)

def zero_freeze_grad():
for group in self.optimizer.param_groups:
if group["name"].split('.')[0] in self.freeze_layers:
for p in group["params"]:
if p.grad is not None:
p.grad = None

origin_step = self.optimizer.step
self.origin_step = origin_step

@torch.no_grad()
def step(closure=None):
zero_freeze_grad()
loss = origin_step(closure)
return loss

self.optimizer.step = step

self.is_frozen = True
freeze_layers = ", ".join(self.freeze_layers)
Expand All @@ -504,8 +507,8 @@ def open_all_layer(self):
for name, module in self.model.named_children():
if name in self.freeze_layers:
module.train()
for p in module.parameters():
p.requires_grad_(True)

self.optimizer.step = self.origin_step

self.is_frozen = False

Expand Down

0 comments on commit 2b65882

Please sign in to comment.