In [30]:
import torch
from torch import nn

In [31]:
import torch
import torch.nn as nn

class FrameDiscriminator(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_unit: int, blocks: int, increment: int = 1):
        super().__init__()
        self.input_layer = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=4, stride=2),
            nn.LeakyReLU(0.2)
        )
        
        # Create sequential blocks
        self.blocks = nn.ModuleList()
        for i in range(blocks):
            # Calculate input and output channels for each block
            out_channels = hidden_units * (2 ** (increment + i))
            in_channels = hidden_units if i == 0 else hidden_units * (2 ** (increment + i - 1))
            self.blocks.append(nn.Sequential(
                nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=4, stride=2),
                nn.BatchNorm2d(out_channels),
                nn.LeakyReLU(0.2)
            ))
        
        # Output layer
        self.output_layer = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units * (2 ** (increment + blocks - 1)), out_channels=output_unit, kernel_size=4, stride=1),
            nn.Flatten(),
            nn.Linear(in_features=3*3,out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.input_layer(x)
        for block in self.blocks:
            x = block(x)
        x = self.output_layer(x)
        return x

# Example usage
Frame_model = FrameDiscriminator(input_shape=3, hidden_units=64, output_unit=1, blocks=3)


In [32]:
Frame_model

FrameDiscriminator(
  (input_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2))
    (1): LeakyReLU(negative_slope=0.2)
  )
  (blocks): ModuleList(
    (0): Sequential(
      (0): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (1): Sequential(
      (0): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (2): Sequential(
      (0): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2))
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
  )
  (output_layer): Sequential(
    (0): Conv2d(512, 1, kernel_size=(4, 4), stride=(1, 1))
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_feat

In [33]:
class SequenceDescriminator(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_unit: int, blocks: int, increment: int = 1):
        super().__init__()
        self.input_layer = nn.Sequential(
            nn.Conv3d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1,padding=1),
            nn.LeakyReLU(0.2)
        )
        
        # Create sequential blocks
        self.blocks = nn.ModuleList()
        for i in range(blocks):
            # Calculate input and output channels for each block
            out_channels = hidden_units * (2 ** (increment + i))
            in_channels = hidden_units if i == 0 else hidden_units * (2 ** (increment + i - 1))
            self.blocks.append(nn.Sequential(
                nn.Conv3d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1,padding=1),
                nn.BatchNorm3d(out_channels),
                nn.LeakyReLU(0.2)
            ))
        
        # Output layer
        self.output_layer = nn.Sequential(
            nn.Conv3d(in_channels=hidden_units * (2 ** (increment + blocks - 1)), out_channels=output_unit, kernel_size=3, stride=1,padding=0),
            nn.Flatten(),
            nn.Linear(in_features=3*126*126,out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.input_layer(x)
        # print('After input layer:', x.shape)
        for block in self.blocks:
            x = block(x)
            # print('After block:', x.shape)
        x = self.output_layer(x)
        # print('After output layer:', x.shape)
        return x

Sequence_Model = SequenceDescriminator(input_shape=3,hidden_units=64,output_unit=1,blocks=3)

In [34]:
Sequence_Model

SequenceDescriminator(
  (input_layer): Sequential(
    (0): Conv3d(3, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): LeakyReLU(negative_slope=0.2)
  )
  (blocks): ModuleList(
    (0): Sequential(
      (0): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (1): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (1): Sequential(
      (0): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (1): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (2): Sequential(
      (0): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
      (1): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
  )
  (output_layer): Sequential(
    (0): Conv3d(512, 

In [35]:
import torch
import torch.nn as nn

class ConvLSTM2DCell(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size, bias=True):
        super(ConvLSTM2DCell, self).__init__()

        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.kernel_size = kernel_size
        self.padding = kernel_size // 2
        self.bias = bias

        self.conv = nn.Conv2d(
            in_channels=self.input_channels + self.hidden_channels,
            out_channels=4 * self.hidden_channels,
            kernel_size=self.kernel_size,
            padding=self.padding,
            bias=self.bias
        )

    def forward(self, x, h, c):
        combined = torch.cat([x, h], dim=1)
        conv_out = self.conv(combined)
        cc_i, cc_f, cc_o, cc_g = torch.split(conv_out, self.hidden_channels, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)

        c_next = f * c + i * g
        h_next = o * torch.tanh(c_next)

        return h_next, c_next

class ConvLSTM2D(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size, num_layers, bias=True, batch_first=False):
        super(ConvLSTM2D, self).__init__()

        self.input_channels = input_channels
        self.hidden_channels = hidden_channels
        self.kernel_size = kernel_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first

        self.cells = nn.ModuleList(
            [ConvLSTM2DCell(self.input_channels if i == 0 else self.hidden_channels[i - 1],
                            self.hidden_channels[i],
                            self.kernel_size,
                            self.bias) for i in range(self.num_layers)]
        )

    def forward(self, x, hidden_state=None):
        if not self.batch_first:
            x = x.permute(1, 0, 2, 3, 4)  # change to (time, batch, channel, height, width)

        b, _, _, h, w = x.size()

        if hidden_state is None:
            hidden_state = self._init_hidden(b, h, w)

        seq_len = x.size(1)
        cur_layer_input = x

        outputs = []
        for layer_idx in range(self.num_layers):
            h, c = hidden_state[layer_idx]
            output_inner = []
            for t in range(seq_len):
                h, c = self.cells[layer_idx](cur_layer_input[:, t, :, :, :], h, c)
                output_inner.append(h)
            cur_layer_input = torch.stack(output_inner, dim=1)
            outputs.append(cur_layer_input)

        return outputs, (h, c)

    def _init_hidden(self, batch_size, height, width):
        return [(torch.zeros(batch_size, hidden_dim, height, width, device=self.cells[0].conv.weight.device),
                 torch.zeros(batch_size, hidden_dim, height, width, device=self.cells[0].conv.weight.device))
                for hidden_dim in self.hidden_channels]

class LSTMSequenceDiscriminator(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_unit: int, blocks: int, increment: int = 1):
        super().__init__()

        # Input layer
        self.input_layer = nn.Sequential(
            nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2)
        )

        # Create ConvLSTM2D blocks
        self.convlstm_blocks = nn.ModuleList()
        for i in range(blocks):
            out_channels = hidden_units * (2 ** (increment + i))
            in_channels = hidden_units if i == 0 else hidden_units * (2 ** (increment + i - 1))
            self.convlstm_blocks.append(nn.ModuleList([
                ConvLSTM2D(input_channels=in_channels, hidden_channels=[out_channels], kernel_size=3, num_layers=1, batch_first=True),
                nn.BatchNorm2d(out_channels),
                nn.LeakyReLU(0.2)
            ]))

        # Output layer
        self.output_layer = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units * (2 ** (increment + blocks - 1)), out_channels=output_unit, kernel_size=4, stride=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # x should have shape (batch, time, channels, height, width)
        batch_size, seq_len, _, height, width = x.size()

        # Apply the initial convolution layer
        x = x.view(-1, x.size(2), x.size(3), x.size(4))  # Merge batch and time
        x = self.input_layer(x)
        x = x.view(batch_size, seq_len, x.size(1), x.size(2), x.size(3))  # Reshape back to 5D

        for block in self.convlstm_blocks:
            conv_lstm, batch_norm, leaky_relu = block
            x, _ = conv_lstm(x)  # Extract the sequence output from ConvLSTM2D

            # Ensure x is a tensor before applying batch normalization
            if isinstance(x, tuple):
                x = x[0]  # Get the tensor from the tuple

            # Apply BatchNorm2d to each time step
            # Assuming x is now a 5D tensor of shape (batch, time, channels, height, width)
            x = torch.stack([batch_norm(t) for t in x.unbind(dim=1)], dim=1)  # Apply BatchNorm2d per time step
            x = leaky_relu(x)  # Apply LeakyReLU

        # Use the last time step's output
        last_output = x[:, -1, :, :, :]
        out = self.output_layer(last_output)
        return out





# Example usage
LSTM_Sequence_Model = LSTMSequenceDiscriminator(input_shape=3, hidden_units=64, output_unit=1, blocks=3)


In [7]:
LSTM_Sequence_Model

LSTMSequenceDiscriminator(
  (input_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.2)
  )
  (convlstm_blocks): ModuleList(
    (0): ModuleList(
      (0): ConvLSTM2D(
        (cells): ModuleList(
          (0): ConvLSTM2DCell(
            (conv): Conv2d(192, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          )
        )
      )
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (1): ModuleList(
      (0): ConvLSTM2D(
        (cells): ModuleList(
          (0): ConvLSTM2DCell(
            (conv): Conv2d(384, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          )
        )
      )
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.2)
    )
    (2): ModuleList(
      (0): ConvLSTM2D(
        (cells): ModuleList

In [29]:
batch_size = 1
seq_len = 5  # Sequence length (depth)
channels = 3  # RGB channels
height = 128
width = 128

# Example tensor with random values
x = torch.rand(batch_size, channels,seq_len,height, width)  # Note the order: [batch, channels, depth, height, width]

# Forward pass
output = Sequence_Model(x)
print(torch.round(output).item())  # Print the output s

After input layer: torch.Size([1, 64, 5, 128, 128])
After block: torch.Size([1, 128, 5, 128, 128])
After block: torch.Size([1, 256, 5, 128, 128])
After block: torch.Size([1, 512, 5, 128, 128])
After output layer: torch.Size([1, 1])
1.0


In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvLSTMCell(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size, stride=1, padding=1):
        super(ConvLSTMCell, self).__init__()
        
        self.hidden_channels = hidden_channels
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        
        self.conv = nn.Conv2d(
            in_channels=input_channels + hidden_channels,
            out_channels=4 * hidden_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding
        )
        
    def forward(self, x, hidden):
        h, c = hidden
        
        # Ensure that x and h have the same spatial dimensions
        if x.size()[2:] != h.size()[2:]:
            raise ValueError(f"Spatial dimensions of x {x.size()[2:]} and h {h.size()[2:]} must match.")
        
        combined = torch.cat([x, h], dim=1)
        conv_out = self.conv(combined)
        
        i, f, o, g = torch.chunk(conv_out, 4, dim=1)
        i = torch.sigmoid(i)
        f = torch.sigmoid(f)
        o = torch.sigmoid(o)
        g = torch.tanh(g)
        
        c = f * c + i * g
        h = o * torch.tanh(c)
        
        return h, c

class ConvLSTM(nn.Module):
    def __init__(self, input_channels, hidden_channels, kernel_size, num_layers):
        super(ConvLSTM, self).__init__()
        
        self.num_layers = num_layers
        self.cells = nn.ModuleList([
            ConvLSTMCell(input_channels if i == 0 else hidden_channels,
                         hidden_channels,
                         kernel_size)
            for i in range(num_layers)
        ])
        
    def forward(self, x):
        batch_size, seq_len, _, height, width = x.size()
        hidden = [(torch.zeros(batch_size, self.cells[0].hidden_channels, height, width).to(x.device),
                   torch.zeros(batch_size, self.cells[0].hidden_channels, height, width).to(x.device))
                  for _ in range(self.num_layers)]
        
        for t in range(seq_len):
            for l in range(self.num_layers):
                hidden[l] = self.cells[l](x[:, t], hidden[l])
                
        return hidden[-1][0]  # Output of the last layer's hidden state

# Example usage
if __name__ == "__main__":
    input_channels = 1
    hidden_channels = 16
    kernel_size = 3
    num_layers = 1
    seq_len = 5
    height, width = 64, 64
    
    model = ConvLSTM(input_channels, hidden_channels, kernel_size, num_layers)
    input_tensor = torch.randn(8, seq_len, input_channels, height, width)  # Batch size of 8
    output = model(input_tensor)
    print(output.shape)  # Expected output shape: (8, hidden_channels, height, width)


torch.Size([8, 16, 64, 64])
