In [1]:
import os
import torch
import torch.nn as nn
import numpy as np

# GPU
gpu = '1'
os.environ["CUDA_VISIBLE_DEVICES"] = gpu

def convert_bytes(size):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.2f %s" % (size, x)
        size /= 1024.0
    return size

### Memory analyse

In [2]:
v_size = [512,512,512]
print(v_size, convert_bytes(np.prod(v_size)*4))
ptch_size = [16,16,16]

emb_size_reshape = [int(i/j) for i,j in zip(v_size, ptch_size)] + [np.prod(ptch_size)]
print(emb_size_reshape, convert_bytes(np.prod(emb_size_reshape)*4))
emb_size_flat = [np.prod(emb_size_reshape[:3]), emb_size_reshape[3]]
print(emb_size_flat, convert_bytes(np.prod(emb_size_flat)*4))
att_size = [emb_size_flat[0]]*2
print(att_size, convert_bytes(np.prod(att_size)*4))

[512, 512, 512] 512.00 MB
[32, 32, 32, 4096] 512.00 MB
[32768, 4096] 512.00 MB
[32768, 32768] 4.00 GB


### Transformer test

In [5]:
print('Init',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))
bs = 1
v_size = [bs,512,512,512]
ptch_size = [64]*3
emb_size_reshape = [bs] + [int(i/j) for i,j in zip(v_size[1:], ptch_size)] + [np.prod(ptch_size)]
emb_size_flat = [bs] + [np.prod(emb_size_reshape[1:4]), emb_size_reshape[-1]]
d_model = 4096*2#emb_size_reshape[-1]

x = torch.from_numpy(np.random.rand(*v_size).astype(float)).float().cuda()
print('Input',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))
x = torch.reshape(x, emb_size_reshape)
print('Input reshape',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))
x = torch.reshape(x, emb_size_flat)
print('Input flat',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))
print(x.shape)

lin = nn.Linear(emb_size_flat[-1], d_model).float().cuda()
encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=8)
tr = nn.TransformerEncoder(encoder_layer, num_layers=1).float().cuda()
print('Model',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))

x = lin(x)
y = tr(x)
print('Forward',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))

print('y.shape', y.shape)

Init 4.38 GB 4.26 GB
Input 4.76 GB 4.76 GB
Input reshape 4.76 GB 4.76 GB
Input flat 4.76 GB 4.76 GB
torch.Size([1, 512, 262144])
Model 13.88 GB 13.88 GB
Forward 14.10 GB 9.83 GB
y.shape torch.Size([1, 512, 8192])


In [4]:
patch = [8, 16, 32, 32, 32, 32]
d_ = [1024, 1024, 1024, 2048, 4096, 8192]
mem = [18.56, 2.8, 0.96, 1.34, 2.23, 4.38] #GB

In [6]:
convert_bytes(4*64*512**3)

'32.00 GB'

### UNETR

In [5]:
!git pull

remote: Enumerating objects: 9, done.[K
remote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (1/1), done.[K
remote: Total 5 (delta 4), reused 5 (delta 4), pack-reused 0[K
Dépaquetage des objets: 100% (5/5), 420 octets | 420.00 Kio/s, fait.
Depuis github.com:Myyyr/segmentation3D
   0b46865..b6cd49b  main       -> origin/main
Mise à jour 0b46865..b6cd49b
Fast-forward
 models/mymod/UNETR.py | 2 [32m+[m[31m-[m
 1 file changed, 1 insertion(+), 1 deletion(-)


In [1]:
import os
import torch
import torch.nn as nn
import numpy as np

# GPU
gpu = '1'
os.environ["CUDA_VISIBLE_DEVICES"] = gpu

def convert_bytes(size):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.2f %s" % (size, x)
        size /= 1024.0
    return size

import models.mymod.UNETR as unetr

In [2]:
bs = 1
input_shape = [512,512,256]
v_size = [bs,1]+ input_shape
filters = [4, 16, 64, 256]
skip_idx = [3,6,9,12]
patch_size=(16,16,16)
n_layers=12
#ptch_size = [64]*3
#d_model = 4096*2#emb_size_reshape[-1]

x = torch.from_numpy(np.random.rand(*v_size).astype(float)).float().cuda()

mod = unetr.UNETR(input_shape=input_shape,filters=filters,patch_size=patch_size, n_layers=n_layers, skip_idx=skip_idx).float().cuda()

UNETR


In [3]:
y = mod(x)
print(y.shape)
print('Forward',convert_bytes(torch.cuda.max_memory_allocated()), convert_bytes(torch.cuda.memory_allocated()))


torch.Size([1, 2, 512, 512, 256])
Forward 18.89 GB 17.34 GB
