In [3]:
import torch

# Create a 5-dimensional tensor, e.g., shape (2, 3, 4, 5, 6)
tensor = torch.randn(2, 3, 4, 5, 6)
print("Original shape:", tensor.shape)

# Specify the position to add the new dimension
position = 1
new_channels = 16

# Add the new dimension with the specified number of channels
new_shape = list(tensor.shape)
new_shape.insert(position, new_channels)

# Expand the tensor to the new shape
# Use unsqueeze and expand to achieve the desired shape
expanded_tensor = tensor.unsqueeze(position).expand(*new_shape)
print("New shape:", expanded_tensor.shape)

Original shape: torch.Size([2, 3, 4, 5, 6])
New shape: torch.Size([2, 16, 3, 4, 5, 6])


In [1]:
import torch

# Example tensor with shape (2, 16, 3, 4, 5, 6)
tensor = torch.randn(2, 16, 3, 4, 5, 6)
print("Original shape:", tensor.shape)

# Sum along the channel dimension (index 1)
summed_tensor = tensor.sum(dim=1)
print("Shape after summing channels:", summed_tensor.shape)

Original shape: torch.Size([2, 16, 3, 4, 5, 6])
Shape after summing channels: torch.Size([2, 3, 4, 5, 6])


In [8]:
import torch
from torch.nn import functional as F

tensor = torch.randn(1, 3, 16,16,16)

print(tensor.shape)
kernel = torch.randn(3,6,2,2,2)
tensor = F.conv_transpose3d(tensor, kernel, stride=2)
print(tensor.shape)

torch.Size([1, 3, 16, 16, 16])
torch.Size([1, 6, 32, 32, 32])


In [4]:
import torch
import math


class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        print(self.a.device)
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
model = Polynomial3()

model.cuda()



# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters (defined
# with torch.nn.Parameter) which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x.cuda())

    # Compute and print loss
    loss = criterion(y_pred, y.cuda())
    if t % 100 == 99:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
99 122.9929428100586
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0
cuda:0

In [1]:
from einops import rearrange, repeat
from torch.nn import functional as F
import torch

In [9]:
x = torch.randn(1, 16, 5, 128,128,128).cuda()
print(x.shape)

x = rearrange(x, "b g c h w d -> b (g c) h w d")
print(x.shape)

torch.Size([1, 16, 5, 128, 128, 128])
torch.Size([1, 80, 128, 128, 128])


In [10]:
kernel = torch.randn(10,16, 5, 16, 3,3,3)
print(kernel.shape)

kernel = rearrange(kernel, "o g i g2 h w d -> (o g) (i g2) h w d").cuda()
print(kernel.shape)

x = F.conv3d(x, kernel, padding=1)
print(x.shape)
x = rearrange(x, "b (g c) h w d -> b g c h w d", c=10)
print(x.shape)

torch.Size([10, 16, 5, 16, 3, 3, 3])
torch.Size([160, 80, 3, 3, 3])
torch.Size([1, 160, 128, 128, 128])
torch.Size([1, 16, 10, 128, 128, 128])


In [21]:
orig_weight = torch.randn(10, 5, 16, 3, 3, 3)


group_size = 16
weight = orig_weight.view(1, 5*10, group_size, 3, 3, 3)
print(weight.shape)

weight = repeat(orig_weight, "o i g h w d -> g2 (o i) g h w d", g2=16)
print(weight.shape)

torch.Size([1, 50, 16, 3, 3, 3])
torch.Size([16, 50, 16, 3, 3, 3])


In [23]:
weight = repeat(orig_weight, "o i g h w d -> g2 (o i) g h w d", g2 = 16)
print(weight.shape)

torch.Size([16, 50, 16, 3, 3, 3])


In [25]:
example_tensor = torch.randn(10, 5, 16, 3, 4, 2)

rotated_tensor_right = rearrange(example_tensor, "o i g h w d -> o i g w h d")
print(rotated_tensor.shape) 

torch.Size([10, 5, 16, 4, 3, 2])


In [None]:
def _rot_back_90(self, t):
    t.rot90(k=1, dims=(4,2))

    # rotates kernel towards the right in place
def _rot_right_90(self, t):
    t.rot90(k=1, dims=(3,2))

original_kernel = torch.randn(10, 5, 16, 3, 3, 3)
permutations = []
for i in range(16):


Start with kernel of dimensions
(output_channels, input_channels, h, w, d)

Turn input tensor into dimensions
(output_channels, input_channels, groups, h,w,d)

rotate all the individual group kernels

Then transform into 
(output_channels, (input_channels, * groups), h, w, d)

For input of dimension
(batch, in_channels, groups, h, w, d)
transform into 
(batch, (in_channels * groups), h,w,d)

for input of dimension
(batch, in_channels, h,w,d)

transform into 
(batch, in_channels, groups, h,w,d)
then into 
(batch, in_channels*groups, h,w,d)

output as
(batch, out_channels, groups, h,w,d)

and finally as
(batch, out_channels, h,w,d) by sum reduction along groups dimension


In [34]:
def _rot_back_90(self, t):
    t.rot90(k=1, dims=(2,0))

    # rotates kernel towards the right in place
def _rot_right_90(self, t):
    t.rot90(k=1, dims=(1,0))

def generate_kernel(kernel):
    transformed_kernel = repeat(kernel, "o i h w d -> o i g h w d", g=16)
    print(transformed_kernel.shape)
    for i in range(16):
        print(transformed_kernel[:,:,i,:,:,:].shape)
        # _rot_right_90(transformed_kernel[0,0,i,:,:,:])
        # if i % 4 == 0:
        #     _rot_back_90(transformed_kernel[0,0,i,:,:,:])
        
original_kernel = torch.randn(10, 5, 3, 3, 3)
generate_kernel(original_kernel)

torch.Size([10, 5, 16, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])
torch.Size([10, 5, 3, 3, 3])


In [3]:
def create_rotated_kernels(kernel):
    rotations = []

    for rot_x in range(4):
        for rot_y in range(4):
            rotated_kernel = torch.rot90(kernel, rot_x, [3,4])
            rotated_kernel = torch.rot90(rotated_kernel, rot_y, [2,4])
            rotations.append(rotated_kernel)
    
    return torch.stack(rotations, dim=2)

# kernel = torch.randn(32 * 16, 32, 3,3,3)
# print(create_rotated_kernels(kernel).shape)

In [6]:
expanded_kernel = create_rotated_kernels(kernel)
print(expanded_kernel.shape)

combined_kernel = rearrange(expanded_kernel, "o i g h w d -> o (i g) h w d")
combined_kernel.shape



torch.Size([512, 32, 16, 3, 3, 3])


torch.Size([512, 512, 3, 3, 3])

In [7]:
x = torch.randn(1, 32, 16, 128,128,128).cuda()
x = rearrange(x, "b i g h w d -> b (i g) h w d")
print(x.shape)
combined_kernel = combined_kernel.cuda()
print(combined_kernel.shape)
x = F.conv3d(x, combined_kernel, padding=1)
print(x.shape)

torch.Size([1, 512, 128, 128, 128])
torch.Size([512, 512, 3, 3, 3])
torch.Size([1, 512, 128, 128, 128])


In [8]:
x = rearrange(x, "b (o g) h w d -> b o g h w d", g=16)
x.shape

torch.Size([1, 32, 16, 128, 128, 128])

In [4]:
def all_at_once():
    kernel = torch.randn(32 * 16, 32, 3,3,3)
    expanded_kernel = create_rotated_kernels(kernel)
    print(expanded_kernel.shape)

    combined_kernel = rearrange(expanded_kernel, "o i g h w d -> o (i g) h w d")
    combined_kernel.shape
    x = torch.randn(1, 32, 16, 128,128,128).cuda()
    x = rearrange(x, "b i g h w d -> b (i g) h w d")
    print(x.shape)
    combined_kernel = combined_kernel.cuda()
    print(combined_kernel.shape)
    x = F.conv3d(x, combined_kernel, padding=1)
    print(x.shape)
    x = rearrange(x, "b (o g) h w d -> b o g h w d", g=16)
    return x

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()

initial_mem_allocated = torch.cuda.memory_allocated()
initial_memory_reserved = torch.cuda.memory_reserved()

result = all_at_once()

final_memory_allocated = torch.cuda.memory_allocated()
final_memory_reserved = torch.cuda.memory_reserved()
max_memory_allocated = torch.cuda.max_memory_allocated()

print(initial_mem_allocated)
print(initial_memory_reserved)
print(final_memory_allocated)
print(final_memory_reserved)
print(max_memory_allocated)



torch.Size([512, 32, 16, 3, 3, 3])
torch.Size([1, 512, 128, 128, 128])
torch.Size([512, 512, 3, 3, 3])
torch.Size([1, 512, 128, 128, 128])
0
0
4294967296
8619294720
8619294720


In [1]:
from GroupConv3d import GroupConv3d
import torch

def all_at_once_2():
    layer = GroupConv3d(32, 32).cuda()
    data = torch.randn(1,32,32,128,128,128).cuda()
    data = layer(data)

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()

initial_mem_allocated = torch.cuda.memory_allocated()
initial_memory_reserved = torch.cuda.memory_reserved()

result = all_at_once_2()

final_memory_allocated = torch.cuda.memory_allocated()
final_memory_reserved = torch.cuda.memory_reserved()
max_memory_allocated = torch.cuda.max_memory_allocated()
print(initial_mem_allocated)
print(initial_memory_reserved)
print(final_memory_allocated)
print(final_memory_reserved)
print(max_memory_allocated)



OutOfMemoryError: CUDA out of memory. Tried to allocate 8.00 GiB. GPU 0 has a total capacity of 11.66 GiB of which 3.08 GiB is free. Including non-PyTorch memory, this process has 8.10 GiB memory in use. Of the allocated memory 8.00 GiB is allocated by PyTorch, and 1.89 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)