In [44]:
import torch
import paddle

from collections import OrderedDict

import utils
from models import vit_small, MultiCropWrapper, IBOTHead

paddle.device.set_device("gpu:7")


Place(gpu:7)

In [58]:
paddle.seed(10)

student = vit_small(patch_size=16, drop_path_rate=0.1, return_all_tokens=True, masked_im_modeling=True)
teacher = vit_small(patch_size=16, return_all_tokens=True)
embed_dim = student.embed_dim
student = MultiCropWrapper(student, IBOTHead(
    embed_dim,
    out_dim=8192,
    patch_out_dim=8192,
    norm=None,
    act="gelu",
    norm_last_layer=False,
    shared_head=True,
))

teacher = MultiCropWrapper(
    teacher,
    IBOTHead(
        embed_dim,
        out_dim=8192,
        patch_out_dim=8192,
        norm=None,
        act="gelu",
        shared_head=True,
    ),
)

paddle_weight = student.state_dict()
student.eval()
teacher.eval()

torch_ckpt = torch.load("/home/xiejunlin/workspace/ibot/pretrained/checkpoint.pth", map_location='cpu')
torch_ckpt["student"] = {k.replace("module.", ""): v for k, v in torch_ckpt["student"].items()}
torch_ckpt["teacher"] = {k.replace("module.", ""): v for k, v in torch_ckpt["teacher"].items()}


In [59]:
student_weight = torch_ckpt["student"]
# student_weight.pop("head.last_layer.weight_g")
# student_weight.pop("head.last_layer2.weight_g")
# student_weight.pop('head.last_layer.weight_v')
# student_weight.pop('head.last_layer2.weight_v')

student_weight_dict = OrderedDict()
for paddle_key in paddle_weight.keys():
    # 首先要确保torch的权重里面有这个key，这样就可以避免DIY模型中一些小模块影响权重转换
    if paddle_key in student_weight.keys():
        # pytorch权重和paddle模型的权重为2维时需要转置，其余情况不需要
        if len(student_weight[paddle_key].detach().numpy().shape) == 2 and "masked_embed" not in paddle_key:
            # print(paddle_key)
            student_weight_dict[paddle_key] = student_weight[paddle_key].detach().numpy().T
        else:
            student_weight_dict[paddle_key] = student_weight[paddle_key].detach().numpy()
    else:
        pass

student_weight_dict["head.last_layer.weight_g"] = student_weight["head.last_layer.weight_g"].squeeze(-1).detach().cpu().numpy()
student_weight_dict["head.last_layer2.weight_g"] = student_weight["head.last_layer2.weight_g"].squeeze(-1).detach().cpu().numpy()


In [60]:

teacher_weight = torch_ckpt["teacher"]
# teacher_weight.pop('head.last_layer.weight_g')
# teacher_weight.pop('head.last_layer2.weight_g')
# teacher_weight.pop('head.last_layer.weight_v')
# teacher_weight.pop('head.last_layer2.weight_v')

teacher_weight_dict = OrderedDict()
for paddle_key in paddle_weight.keys():
    # 首先要确保torch的权重里面有这个key，这样就可以避免DIY模型中一些小模块影响权重转换
    if paddle_key in teacher_weight.keys():
        # pytorch权重和paddle模型的权重为2维时需要转置，其余情况不需要
        if len(teacher_weight[paddle_key].detach().numpy().shape) == 2 and "masked_embed" not in paddle_key:
            # print(paddle_key)
            teacher_weight_dict[paddle_key] = teacher_weight[paddle_key].detach().numpy().T
        else:
            teacher_weight_dict[paddle_key] = teacher_weight[paddle_key].detach().numpy()
    else:
        pass

teacher_weight_dict["head.last_layer.weight_g"] = student_weight["head.last_layer.weight_g"].squeeze(-1).detach().cpu().numpy()
teacher_weight_dict["head.last_layer2.weight_g"] = student_weight["head.last_layer2.weight_g"].squeeze(-1).detach().cpu().numpy()

In [61]:
optimizer = torch_ckpt['optimizer']
optimizer.keys()

optim_dict = {'state': {}, 'param_groups': []}

for k, v in optimizer['state'].items():
    optim_dict['state'][k] = {}

for k, v in optimizer['state'].items():
    for k2, v2 in v.items():
        if isinstance(v2, torch.Tensor):
            v2 = v2.detach().numpy()
        optim_dict['state'][k][k2] = v2


for param in optimizer['param_groups']:
    optim_dict['param_groups'].append(param)


In [12]:
params_groups = utils.get_params_groups(student)
optimizer = paddle.optimizer.AdamW(learning_rate=0.0001, parameters=params_groups)

{}

In [62]:

ibot_loss = torch_ckpt['ibot_loss']
ibot_loss = OrderedDict({
    k: v.detach().numpy() for k, v in ibot_loss.items()
})

In [50]:
paddle.save({"student": student_weight_dict, "teacher":teacher_weight_dict, "epoch": 100, "ibot_loss": ibot_loss, "optimizer": optim_dict}, "check/ckpt/full_ckpt_v2.pdparams")


In [63]:
paddle.save({"student": student_weight_dict, "teacher":teacher_weight_dict, "epoch": 100, "ibot_loss": ibot_loss}, "check/ckpt/full_ckpt_weight_gv_key_v3.pdparams")


In [70]:
import numpy as np

weight_g_pd = np.load("/home/xiejunlin/workspace/IBOT-Paddle/data/weight_norm_g_pd.npy")
weight_g_th = np.load("/home/xiejunlin/workspace/IBOT-Paddle/data/weight_norm_g_th.npy")

res = np.abs(weight_g_pd - weight_g_th).mean()
weight_g_th
weight_g_pd

array([1.0796394 , 1.1381648 , 1.3803074 , ..., 1.3158706 , 0.95200276,
       1.2740731 ], dtype=float32)