In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from lib.actor.LeeNet import LeeNetActor
from lib.models.backbone.pure_RMT import PureRMT
from lib.models.backbone.plscore_RMT import PLScoreRMT
from lib.models.LeeNet.score_pureRMT_mlp import ScorePureRMTMLP
from lib.models.head.mlp import MLP
from lib.models.layer.patch_embed import PatchEmbed
from lib.models.layer.score import PLScoreLayerUseConv
from lib.trainer.LeeNet_trainer import LeeNetTrainer
from lib.utils.base_funtion import build_dataloaders, get_optimizer_scheduler
from lib.config.cfg_loader import env_setting
from torch.nn.functional import l1_loss
from torch.nn import BCEWithLogitsLoss
from lib.utils.box_ops import giou_loss
from lib.utils.focal_loss import FocalLoss
import torch


# def build_model(cfg):
#     backbone = PureRMT(cfg=cfg)
#     head = MLP(input_dim=10 * cfg.model.pureRMT.embed_dim[-1], hidden_dim=cfg.model.pureRMT.embed_dim[-1], output_dim=4, num_layers=2)
#     model = ScorePureRMTMLP(backbone, head, cfg)
#     return model

def build_model(cfg):
    backbone = PLScoreRMT(cfg=cfg)
    head = MLP(input_dim=10 * cfg.model.pureRMT.embed_dim[-1], hidden_dim=cfg.model.pureRMT.embed_dim[-1], output_dim=4, num_layers=2)
    model = ScorePureRMTMLP(backbone, head, cfg)
    return model

In [3]:
# cfg = env_setting(cfg_name=None)
# 
# loader_train, loader_val = build_dataloaders(cfg)
# 
# device = torch.device("cuda:2")
# patch_embed = PatchEmbed(patch_size=16, in_chans=3, embed_dim=96, flatten=False).to(device)
# score = PLScoreLayerUseConv(embed_dim=96).to(device)
# 
# for i,data in enumerate(loader_train):
# 
#     data = data.to(device)
# 
#     x = data['search_images'][0].view(-1, *data['search_images'].shape[2:])
#     z = data['template_images'][0].view(-1, *data['template_images'].shape[2:])
# 
#     x_rgb = x[:, :3, :, :]
#     z_rgb = z[:, :3, :, :]
# 
#     # get modal information (B,C,H,W)
#     x_modal = x[:, 3:, :, :]
#     z_modal = z[:, 3:, :, :]
# 
#     # patch embedding      ->(B,C:Embed_dim,P_N,P_N) P_N = patch_nums(H / patch_size, W / patch_size)
#     x_rgb, _ = patch_embed(x_rgb)
#     z_rgb, _ = patch_embed(z_rgb)
# 
#     x_modal, _ = patch_embed(x_modal)
#     z_modal, _ = patch_embed(z_modal)
# 
#     # use score function
#     t = score(z_rgb,z_modal)  # (B,1,P_N,P_N) 
#     s = score(x_rgb,x_modal)
#     break

In [4]:
cfg = env_setting(cfg_name=None)

loader_train, loader_val = build_dataloaders(cfg)

net = build_model(cfg)

focal_loss = FocalLoss()
objective = {'giou': giou_loss, 'l1': l1_loss, 'focal': focal_loss, 'cls': BCEWithLogitsLoss()}
loss_weight = {'giou': cfg.train.GIOU_weight, 'l1': cfg.train.L1_weight, 'focal': 1., 'cls': 1.0}
actor = LeeNetActor(net=net, objective=objective, loss_weight=loss_weight, cfg=cfg)

optimizer, lr_scheduler = get_optimizer_scheduler(net, cfg)

trainer = LeeNetTrainer(actor=actor, loaders=[loader_train, loader_val], optimizer=optimizer, lr_scheduler=lr_scheduler, cfg=cfg)

# for name, parms in net.backbone.score.conv2.named_parameters():	
#     print('-->name:', name)
#     print('-->para:', parms)
#     print('-->grad_requirs:',parms.requires_grad)
#     print('-->grad_value:',parms.grad)
#     print("===")


In [5]:
trainer.train(cfg.train.epoch)

-->name: conv.weight
-->grad_value: tensor([[[[ 3.0541e-05,  3.3667e-05,  3.6793e-05,  ...,  4.7213e-05,
            4.6312e-05,  4.5412e-05],
          [ 3.0734e-05,  3.2438e-05,  3.4142e-05,  ...,  4.0947e-05,
            4.2995e-05,  4.5043e-05],
          [ 3.0926e-05,  3.1209e-05,  3.1491e-05,  ...,  3.4682e-05,
            3.9678e-05,  4.4674e-05],
          ...,
          [ 1.1426e-06,  9.9472e-07,  8.4682e-07,  ...,  2.1359e-06,
            4.5849e-06,  7.0339e-06],
          [ 1.2710e-05,  1.0971e-05,  9.2313e-06,  ...,  1.4116e-05,
            1.6863e-05,  1.9611e-05],
          [ 2.4278e-05,  2.0947e-05,  1.7616e-05,  ...,  2.6095e-05,
            2.9141e-05,  3.2187e-05]],

         [[-3.4003e-05, -2.7349e-05, -2.0696e-05,  ..., -4.5860e-05,
           -4.8610e-05, -5.1360e-05],
          [-2.6728e-05, -2.2370e-05, -1.8012e-05,  ..., -4.1517e-05,
           -4.5007e-05, -4.8497e-05],
          [-1.9452e-05, -1.7390e-05, -1.5328e-05,  ..., -3.7174e-05,
           -4.1404e-05

KeyboardInterrupt: 

In [None]:
# trainer.load_checkpoint()
# optimizer = trainer.optimizer
# 
# optimizer.param_groups[0]['lr'] = 0.0005
# 
# for params in optimizer.param_groups:
#     print(params['lr'])