In [2]:
import torch
import numpy as np
import torch.nn as nn
from torchsummary import summary

In [7]:
class CNN_3D(nn.Module):
    def __init__(self, input_channels: int = 8, name: str = '3dcnn', version_str: str = 'v0.0.0'):
        super().__init__()
        # model information and metadata
        self.name = name
        self.version_num = version_str
        self.save_path = f'models/cnn/saves/{self.name}_{self.version_num}.pth'
        # middle - most latent dimension
        self.latent_dim = 3 * 5 * 5

        # total data is shape (batch_size, 224, 48, 96)
        
        # encoder stack
        self.encoder = nn.Sequential(
            nn.Conv3d(
                in_channels=input_channels, 
                out_channels=4, 
                kernel_size=(3,3,3), 
                stride=1, 
                padding=2
            ),
            nn.MaxPool3d((4, 4, 4), stride=(2, 2, 2)),
            nn.ReLU(),
            nn.Conv3d(
                in_channels=4, 
                out_channels=4, 
                kernel_size=(3,3,3), 
                stride=1,
                padding=2
            ),
            nn.MaxPool3d((4, 4, 4), stride=(2, 2, 2)),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(4*224*48*96 // (4*4*4), self.latent_dim),
            nn.Sigmoid(),
        )

        # decoder stack
        self.decoder = nn.Sequential(
            nn.Linear(self.latent_dim, 4*224*48*96 // (4*4*4)),
            nn.Unflatten(1, (4, (224//4), (48//4), (96//4))),
            nn.ConvTranspose3d(
                in_channels=4, 
                out_channels=4, 
                kernel_size=(4,4,4), 
                stride=2, 
                padding=1
            ),
            nn.ReLU(),
            nn.ConvTranspose3d(
                in_channels=4, 
                out_channels=8, 
                kernel_size=(4,4,4), 
                stride=2,
                padding=1
            ),
        )

    # forward pass on x
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
        

In [8]:
batch_size = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_3D().to(device)
summary(model, input_size=(8, 224, 48, 96))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 4, 226, 50, 98]             868
         MaxPool3d-2       [-1, 4, 112, 24, 48]               0
              ReLU-3       [-1, 4, 112, 24, 48]               0
            Conv3d-4       [-1, 4, 114, 26, 50]             436
         MaxPool3d-5        [-1, 4, 56, 12, 24]               0
              ReLU-6        [-1, 4, 56, 12, 24]               0
           Flatten-7                [-1, 64512]               0
            Linear-8                   [-1, 75]       4,838,475
           Sigmoid-9                   [-1, 75]               0
           Linear-10                [-1, 64512]       4,902,912
        Unflatten-11        [-1, 4, 56, 12, 24]               0
  ConvTranspose3d-12       [-1, 4, 112, 24, 48]           1,028
             ReLU-13       [-1, 4, 112, 24, 48]               0
  ConvTranspose3d-14       [-1, 8, 224,

In [1]:
class CNN_DEPTH(nn.Module):
    def __init__(self, input_channels: int = 8):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Convd(in_channels=input_channels, out_channels=input_channels, kernel_size=3, groups=input_channels),
            nn.Conv2d(in_channels=input_channels, out_channels=32, kernel_size=1),
            nn.GELU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, groups=32),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=1),
            nn.GELU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1),
            nn.GELU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1),
            nn.GELU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1),
            nn.GELU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1),
            nn.GELU(),
            )

        self.bottleneck = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * 8 * 8, 4096),
            nn.GELU(),
            nn.Linear(4096, 1024 * 8 * 8),
            nn.Unflatten(1, (1024, 8, 8)),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.Conv2d(in_channels=64, out_channels=8, kernel_size=1)
            )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.bottleneck(x)
        x = self.decoder(x)
        return x

NameError: name 'nn' is not defined

In [None]:
batch_size = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_DEPTH().to(device)
summary(model, input_size=(8, 224, 48, 96))

In [3]:
class CNN_DEPTH(nn.Module):
    def __init__(self, input_channels: int = 8):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv3d(in_channels=input_channels, out_channels=input_channels, kernel_size=3, groups=input_channels),
            nn.Conv3d(in_channels=input_channels, out_channels=32, kernel_size=1),
            nn.GELU(),
            nn.Conv3d(in_channels=32, out_channels=32, kernel_size=3, groups=32),
            nn.Conv3d(in_channels=32, out_channels=64, kernel_size=1),
            nn.GELU(),
            nn.Conv3d(in_channels=64, out_channels=64, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv3d(in_channels=64, out_channels=128, kernel_size=1),
            nn.GELU(),
            nn.Conv3d(in_channels=128, out_channels=128, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv3d(in_channels=128, out_channels=256, kernel_size=1),
            nn.GELU(),
            nn.Conv3d(in_channels=256, out_channels=256, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv3d(in_channels=256, out_channels=512, kernel_size=1),
            nn.GELU(),
            nn.Conv3d(in_channels=512, out_channels=512, kernel_size=3, stride= 2, padding=1, groups=64),
            nn.Conv3d(in_channels=512, out_channels=1024, kernel_size=1),
            nn.GELU(),
            )

        self.bottleneck = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * 14 * 3 * 6, 2048),
            nn.GELU(),
            nn.Linear(2048, 1024 * 14 * 3 * 6),
            nn.Unflatten(1, (1024, 14, 3, 6)),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(in_channels=1024, out_channels=512, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.ConvTranspose3d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.ConvTranspose3d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.ConvTranspose3d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
            nn.GELU(),
            nn.Conv3d(in_channels=64, out_channels=8, kernel_size=1)
            )
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.bottleneck(x)
        x = self.decoder(x)
        return x

In [4]:
batch_size = 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN_DEPTH().to(device)
summary(model, input_size=(8, 224, 48, 96))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 8, 222, 46, 94]             224
            Conv3d-2      [-1, 32, 222, 46, 94]             288
              GELU-3      [-1, 32, 222, 46, 94]               0
            Conv3d-4      [-1, 32, 220, 44, 92]             896
            Conv3d-5      [-1, 64, 220, 44, 92]           2,112
              GELU-6      [-1, 64, 220, 44, 92]               0
            Conv3d-7      [-1, 64, 110, 22, 46]           1,792
            Conv3d-8     [-1, 128, 110, 22, 46]           8,320
              GELU-9     [-1, 128, 110, 22, 46]               0
           Conv3d-10      [-1, 128, 55, 11, 23]           7,040
           Conv3d-11      [-1, 256, 55, 11, 23]          33,024
             GELU-12      [-1, 256, 55, 11, 23]               0
           Conv3d-13       [-1, 256, 28, 6, 12]          27,904
           Conv3d-14       [-1, 512, 28