In [1]:
from mmseg.datasets import ADE20KDataset
import mmcv

crop_size = (512, 1024)
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', reduce_zero_label=True),
    dict(type='Resize', img_scale=(2048, 1024)),
    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
    dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]

dataset = ADE20KDataset(
    data_root = 'data/ade/ADEChallengeData2016',
    img_dir='images/training',
    ann_dir='annotations/training',
    pipeline=train_pipeline
)

dataset[0]['img'].shape, dataset[0]['gt_semantic_seg'].shape
img = dataset[4]['gt_semantic_seg']
# mmcv.imshow(img, 'gray')
dataset[0]['img'].shape


KeyboardInterrupt: 

In [None]:
from mmseg.datasets import CityscapesDataset

crop_size = (512, 1024)

img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    # dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
    # dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
    # dict(type='RandomFlip', prob=0.5),
    # dict(type='PhotoMetricDistortion'),
    # dict(type='Normalize', **img_norm_cfg),
    # dict(type='ToMask')
    # dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
    # dict(type='DefaultFormatBundle'),
    # dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]


dataset = CityscapesDataset(
    data_root = 'data/cityscapes/',
    img_dir='leftImg8bit/val',
    ann_dir='gtFine/val',
    pipeline=train_pipeline,
    classes=['road'],
    palette=[[128, 64, 128]],
)

dataset[0]
mmcv.imshow(dataset[3]['img'])
# mmcv.imshow(dataset[3]['gt_semantic_seg'])
# dataset.CLASSES[13]

#  dataset[0]['gt_semantic_seg']


In [None]:
import mmcv
import mmcv_custom   # noqa: F401,F403
import mmseg_custom
from mmseg.models import build_segmentor

cfg = mmcv.Config.fromfile("./configs/cityscapes/upernet_deit_adapter_base_512_160k_cityscapes.py")
cfg.model.pretrained = None
cfg.model.train_cfg = None
model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))



In [None]:
for name, p in model.named_parameters():
    print(f"{name=:50} {p.shape}")
    

In [1]:
from mmseg_custom.models.backbones.base.sam_vit import SAMViT
from functools import partial
import torch

encoder_embed_dim=768
encoder_depth=12
encoder_num_heads=12
encoder_global_attn_indexes=[2, 5, 8, 11]
prompt_embed_dim = 256
image_size = 1024
vit_patch_size = 16
image_embedding_size = image_size // vit_patch_size


model = SAMViT(
    depth=encoder_depth,
    embed_dim=encoder_embed_dim,
    img_size=image_size,
    mlp_ratio=4,
    norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
    num_heads=encoder_num_heads,
    patch_size=vit_patch_size,
    qkv_bias=True,
    use_rel_pos=True,
    global_attn_indexes=encoder_global_attn_indexes,
    window_size=14,
    out_chans=prompt_embed_dim,
    pretrained="./pretrained/sam_vit_b_01ec64.pth",
)



In [4]:
with open("SAMViT_out.txt", "w") as f:
    for name, p in model.named_parameters():
        f.write(f"{name:50}\n{p}\n")

In [14]:
import torch
sum = 0
a = torch.load("./pretrained/sam_vit_b_01ec64.pth")
with open("SAMViT_cp.txt", "w") as f:
    for key in a.keys():
        if key.startswith("image_encoder"):
            f.write(f"{key}\n{a[key]}\n")
            sum += a[key].numel()
sum

89670912

In [2]:
# build SAMAdapter by config file
import mmcv
import mmcv_custom   # noqa: F401,F403
import mmseg_custom
from mmseg.models import build_segmentor

cfg = mmcv.Config.fromfile("./configs/cityscapes/upernet_sam_adapter_base_512_160k_cityscapes.py")
cfg.model.train_cfg = None
model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))



In [None]:
total = 0
back = 0
for name, p in model.named_parameters():
    if name.startswith("backbone"):
        back += p.shape.numel()
    total += p.shape.numel()
    print(f"{name:50} {p.shape.numel()}")
total, back