In [1]:
import torch
import torchvision

from torchvision import models

import datasets.deep_fashion_ctsrbm

import os
import pathlib

import arch.backbones_trf

---

In [2]:
model = torchvision.models.swin_v2_t()

In [3]:
with open("misc/swin_v2_t_model_arch.txt", "w") as out_file:
    print(model, file=out_file)

---

In [4]:
model = torchvision.models.swin_v2_t()

In [5]:
ctsrbm_image_transform = torchvision.models.Swin_V2_T_Weights.DEFAULT.transforms()
ctsrbm_image_transform.antialias = True
ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = datasets.deep_fashion_ctsrbm.ConsToShopClothRetrBmkImageLoader(ctsrbm_dataset_dir, ctsrbm_image_transform)

In [6]:
ctsrbm_image_transform

ImageClassification(
    crop_size=[256]
    resize_size=[260]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [7]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

model.features.register_forward_hook(get_activation('features'))
model.norm.register_forward_hook(get_activation('norm'))
model.permute.register_forward_hook(get_activation('permute'))
model.avgpool.register_forward_hook(get_activation('avgpool'))
model.flatten.register_forward_hook(get_activation('flatten'))
model.head.register_forward_hook(get_activation('head'))

<torch.utils.hooks.RemovableHandle at 0x7f67ea4c19d0>

In [8]:
input = ctsrbm_dataset[0][0][None, :]
output = model(input)

In [9]:
print("{:20s} {:}".format("input", input.shape))
print("{:20s} {:}".format("output", output.shape))

print("---")

for key, item in activation.items():
    print("{:20s} {:}".format(key, item.shape))

input                torch.Size([1, 3, 256, 256])
output               torch.Size([1, 1000])
---
features             torch.Size([1, 8, 8, 768])
norm                 torch.Size([1, 8, 8, 768])
permute              torch.Size([1, 768, 8, 8])
avgpool              torch.Size([1, 768, 1, 1])
flatten              torch.Size([1, 768])
head                 torch.Size([1, 1000])


---

In [10]:
model = torchvision.models.swin_v2_t()

In [11]:
ctsrbm_image_transform = torchvision.models.Swin_V2_T_Weights.DEFAULT.transforms()
ctsrbm_image_transform.antialias = True
ctsrbm_image_transform.crop_size = [512]
ctsrbm_image_transform.resize_size = [520]
ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = datasets.deep_fashion_ctsrbm.ConsToShopClothRetrBmkImageLoader(ctsrbm_dataset_dir, ctsrbm_image_transform)

In [12]:
ctsrbm_image_transform

ImageClassification(
    crop_size=[512]
    resize_size=[520]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [13]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

model.features.register_forward_hook(get_activation('features'))
model.norm.register_forward_hook(get_activation('norm'))
model.permute.register_forward_hook(get_activation('permute'))
model.avgpool.register_forward_hook(get_activation('avgpool'))
model.flatten.register_forward_hook(get_activation('flatten'))
model.head.register_forward_hook(get_activation('head'))

<torch.utils.hooks.RemovableHandle at 0x7f67d5afc040>

In [14]:
input = ctsrbm_dataset[0][0][None, :]
output = model(input)

In [15]:
print("{:20s} {:}".format("input", input.shape))
print("{:20s} {:}".format("output", output.shape))

print("---")

for key, item in activation.items():
    print("{:20s} {:}".format(key, item.shape))

input                torch.Size([1, 3, 512, 512])
output               torch.Size([1, 1000])
---
features             torch.Size([1, 16, 16, 768])
norm                 torch.Size([1, 16, 16, 768])
permute              torch.Size([1, 768, 16, 16])
avgpool              torch.Size([1, 768, 1, 1])
flatten              torch.Size([1, 768])
head                 torch.Size([1, 1000])


---

In [16]:
model = torchvision.models.swin_v2_t()

In [17]:
ctsrbm_image_transform = torchvision.models.Swin_V2_T_Weights.DEFAULT.transforms()
ctsrbm_image_transform.antialias = True
ctsrbm_image_transform.crop_size = [768]
ctsrbm_image_transform.resize_size = [780]
ctsrbm_dataset_dir = os.path.join(pathlib.Path.home(), "data", "DeepFashion", "Consumer-to-shop Clothes Retrieval Benchmark")
ctsrbm_dataset = datasets.deep_fashion_ctsrbm.ConsToShopClothRetrBmkImageLoader(ctsrbm_dataset_dir, ctsrbm_image_transform)

In [18]:
ctsrbm_image_transform

ImageClassification(
    crop_size=[768]
    resize_size=[780]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [19]:
activation = {}
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

model.permute.register_forward_hook(get_activation('permute'))

<torch.utils.hooks.RemovableHandle at 0x7f67ea44cd90>

In [21]:
last_perm_size = 0

for size in range(4, 768 + 1):

    input = ctsrbm_dataset[0][0][None, :, :size, :size]
    output = model(input)

    for key, item in activation.items():
        if key == "permute":
            perm_size = item.size(3)

    if last_perm_size < perm_size:
        print(size, perm_size)
        last_perm_size = perm_size

4 1
36 2
68 3
100 4
132 5
164 6
196 7
228 8
260 9
292 10
324 11
356 12
388 13
420 14
452 15
484 16
516 17
548 18
580 19
612 20
644 21
676 22
708 23
740 24


In [22]:
for size in range(4, 768 + 1):

    input = ctsrbm_dataset[0][0][None, :, :size, :size]
    output = model(input)

    for key, item in activation.items():
        if key == "permute":
            perm_size = item.size(3)

    theo_perm_size = ((size - 4) // 32) + 1

    if perm_size != theo_perm_size:
        print(size, perm_size, theo_perm_size)