In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
from lib.actor.LeeNet import LeeNetActor
from lib.models.backbone.pure_RMT import PureRMT
from lib.models.LeeNet.score_pureRMT_mlp import ScorePureRMTMLP
from lib.models.head.mlp import MLP
from lib.models.layer.patch_embed import PatchEmbed
from lib.models.layer.score import PLScoreLayerUseConv
from lib.trainer.LeeNet_trainer import LeeNetTrainer
from lib.utils.base_funtion import build_dataloaders, get_optimizer_scheduler
from lib.config.cfg_loader import env_setting
from torch.nn.functional import l1_loss
from torch.nn import BCEWithLogitsLoss
from lib.utils.box_ops import giou_loss
from lib.utils.focal_loss import FocalLoss
import torch


def build_model(cfg):
    backbone = PureRMT(cfg=cfg)
    head = MLP(input_dim=10 * cfg.model.pureRMT.embed_dim[-1], hidden_dim=cfg.model.pureRMT.embed_dim[-1], output_dim=4, num_layers=2)
    model = ScorePureRMTMLP(backbone, head, cfg)
    return model


In [16]:
cfg = env_setting(cfg_name=None)

loader_train, loader_val = build_dataloaders(cfg)

device = torch.device("cuda:2")
patch_embed = PatchEmbed(patch_size=16, in_chans=3, embed_dim=96, flatten=False).to(device)
score = PLScoreLayerUseConv(embed_dim=96).to(device)

for i,data in enumerate(loader_train):
    
    data = data.to(device)
    
    x = data['search_images'][0].view(-1, *data['search_images'].shape[2:])
    z = data['template_images'][0].view(-1, *data['template_images'].shape[2:])

    x_rgb = x[:, :3, :, :]
    z_rgb = z[:, :3, :, :]

    # get modal information (B,C,H,W)
    x_modal = x[:, 3:, :, :]
    z_modal = z[:, 3:, :, :]

    # patch embedding      ->(B,C:Embed_dim,P_N,P_N) P_N = patch_nums(H / patch_size, W / patch_size)
    x_rgb, _ = patch_embed(x_rgb)
    z_rgb, _ = patch_embed(z_rgb)

    x_modal, _ = patch_embed(x_modal)
    z_modal, _ = patch_embed(z_modal)

    # use score function
    t = score(z_rgb,z_modal)  # (B,1,P_N,P_N) 
    s = score(x_rgb,x_modal)
    break

tensor([[[[0.6926]]],


        [[[0.4538]]],


        [[[0.4794]]],


        [[[0.4758]]],


        [[[0.4767]]],


        [[[0.4753]]],


        [[[0.6806]]],


        [[[0.4682]]],


        [[[0.5598]]],


        [[[0.4778]]],


        [[[0.4463]]],


        [[[0.6138]]],


        [[[0.4575]]],


        [[[0.4763]]],


        [[[0.4484]]],


        [[[0.5834]]],


        [[[0.4484]]],


        [[[0.4950]]],


        [[[0.5718]]],


        [[[0.4920]]],


        [[[0.5070]]],


        [[[0.4809]]],


        [[[0.4341]]],


        [[[0.5413]]],


        [[[0.6448]]],


        [[[0.6006]]],


        [[[0.5954]]],


        [[[0.4436]]],


        [[[0.4435]]],


        [[[0.4591]]],


        [[[0.4567]]],


        [[[0.5736]]],


        [[[0.4503]]],


        [[[0.4563]]],


        [[[0.5296]]],


        [[[0.4736]]],


        [[[0.5877]]],


        [[[0.5028]]],


        [[[0.4316]]],


        [[[0.4952]]],


        [[[0.4716]]],


        [[[0.467

In [None]:
# cfg = env_setting(cfg_name=None)
# 
# loader_train, loader_val = build_dataloaders(cfg)
# 
# net = build_model(cfg)
# 
# focal_loss = FocalLoss()
# objective = {'giou': giou_loss, 'l1': l1_loss, 'focal': focal_loss, 'cls': BCEWithLogitsLoss()}
# loss_weight = {'giou': cfg.train.GIOU_weight, 'l1': cfg.train.L1_weight, 'focal': 1., 'cls': 1.0}
# actor = LeeNetActor(net=net, objective=objective, loss_weight=loss_weight, cfg=cfg)
# 
# optimizer, lr_scheduler = get_optimizer_scheduler(net, cfg)
# 
# # location loss 没计算出来
# 
# trainer = LeeNetTrainer(actor=actor, loaders=[loader_train, loader_val], optimizer=optimizer, lr_scheduler=lr_scheduler, cfg=cfg)

In [6]:
# trainer.train(cfg.train.epoch,load_latest=True)

In [11]:
# trainer.load_checkpoint()
# optimizer = trainer.optimizer
# 
# optimizer.param_groups[0]['lr'] = 0.0005
# 
# for params in optimizer.param_groups:
#     print(params['lr'])

0.0005
